diff --git a/bombsquad_server.py b/bombsquad_server.py
new file mode 100644
index 0000000..eb81a00
--- /dev/null
+++ b/bombsquad_server.py
@@ -0,0 +1,867 @@
+#!/usr/bin/env -S python3.8 -O
+# Released under the MIT License. See LICENSE for details.
+#
+"""BombSquad server manager."""
+from __future__ import annotations
+
+import json
+import os
+import signal
+import subprocess
+import sys
+import time
+from pathlib import Path
+from threading import Lock, Thread, current_thread
+from typing import TYPE_CHECKING
+
+# We make use of the bacommon and efro packages as well as site-packages
+# included with our bundled Ballistica dist, so we need to add those paths
+# before we import them.
+sys.path += [
+ str(Path(Path(__file__).parent, 'dist', 'ba_data', 'python')),
+ str(Path(Path(__file__).parent, 'dist', 'ba_data', 'python-site-packages'))
+]
+
+from bacommon.servermanager import ServerConfig, StartServerModeCommand
+from efro.dataclassio import dataclass_from_dict, dataclass_validate
+from efro.error import CleanError
+from efro.terminal import Clr
+
+if TYPE_CHECKING:
+ from typing import Optional, List, Dict, Union, Tuple
+ from types import FrameType
+ from bacommon.servermanager import ServerCommand
+
+VERSION_STR = '1.3'
+
+# Version history:
+# 1.3:
+# Added show_tutorial config option
+# Added team_names config option
+# Added team_colors config option
+# Added playlist_inline config option
+# 1.2:
+# Added optional --help arg
+# Added --config arg for specifying config file and --root for ba_root path
+# Added noninteractive mode and --interactive/--noninteractive args to
+# explicitly enable/disable it (it is autodetected by default)
+# Added explicit control for auto-restart: --no-auto-restart
+# Config file is now reloaded each time server binary is restarted; no more
+# need to bring down server wrapper to pick up changes
+# Now automatically restarts server binary when config file is modified
+# (use --no-config-auto-restart to disable that behavior)
+# 1.1.1:
+# Switched config reading to use efro.dataclasses.dataclass_from_dict()
+# 1.1.0:
+# Added shutdown command
+# Changed restart to default to immediate=True
+# Added clean_exit_minutes, unclean_exit_minutes, and idle_exit_minutes
+# 1.0.0:
+# Initial release
+
+
+class ServerManagerApp:
+ """An app which manages BombSquad server execution.
+
+ Handles configuring, launching, re-launching, and otherwise
+ managing BombSquad operating in server mode.
+ """
+
+ # How many seconds we wait after asking our subprocess to do an immediate
+ # shutdown before bringing down the hammer.
+ IMMEDIATE_SHUTDOWN_TIME_LIMIT = 5.0
+
+ def __init__(self) -> None:
+ self._config_path = 'config.yaml'
+ self._user_provided_config_path = False
+ self._config = ServerConfig()
+ self._ba_root_path = os.path.abspath('dist/ba_root')
+ self._interactive = sys.stdin.isatty()
+ self._wrapper_shutdown_desired = False
+ self._done = False
+ self._subprocess_commands: List[Union[str, ServerCommand]] = []
+ self._subprocess_commands_lock = Lock()
+ self._subprocess_force_kill_time: Optional[float] = None
+ self._auto_restart = True
+ self._config_auto_restart = True
+ self._config_mtime: Optional[float] = None
+ self._last_config_mtime_check_time: Optional[float] = None
+ self._should_report_subprocess_error = False
+ self._running = False
+ self._interpreter_start_time: Optional[float] = None
+ self._subprocess: Optional[subprocess.Popen[bytes]] = None
+ self._subprocess_launch_time: Optional[float] = None
+ self._subprocess_sent_config_auto_restart = False
+ self._subprocess_sent_clean_exit = False
+ self._subprocess_sent_unclean_exit = False
+ self._subprocess_thread: Optional[Thread] = None
+ self._subprocess_exited_cleanly: Optional[bool] = None
+
+ # This may override the above defaults.
+ self._parse_command_line_args()
+
+ # Do an initial config-load. If the config is invalid at this point
+ # we can cleanly die (we're more lenient later on reloads).
+ self.load_config(strict=True, print_confirmation=False)
+
+ @property
+ def config(self) -> ServerConfig:
+ """The current config for the app."""
+ return self._config
+
+ @config.setter
+ def config(self, value: ServerConfig) -> None:
+ dataclass_validate(value)
+ self._config = value
+
+ def _prerun(self) -> None:
+ """Common code at the start of any run."""
+
+ # Make sure we don't call run multiple times.
+ if self._running:
+ raise RuntimeError('Already running.')
+ self._running = True
+
+ dbgstr = 'debug' if __debug__ else 'opt'
+ print(
+ f'{Clr.CYN}{Clr.BLD}BombSquad server manager {VERSION_STR}'
+ f' starting up ({dbgstr} mode)...{Clr.RST}',
+ flush=True)
+
+ # Python will handle SIGINT for us (as KeyboardInterrupt) but we
+ # need to register a SIGTERM handler so we have a chance to clean
+ # up our subprocess when someone tells us to die. (and avoid
+ # zombie processes)
+ signal.signal(signal.SIGTERM, self._handle_term_signal)
+
+ # During a run, we make the assumption that cwd is the dir
+ # containing this script, so make that so. Up until now that may
+ # not be the case (we support being called from any location).
+ os.chdir(os.path.abspath(os.path.dirname(__file__)))
+
+ # Fire off a background thread to wrangle our server binaries.
+ self._subprocess_thread = Thread(target=self._bg_thread_main)
+ self._subprocess_thread.start()
+
+ def _postrun(self) -> None:
+ """Common code at the end of any run."""
+ print(f'{Clr.CYN}Server manager shutting down...{Clr.RST}', flush=True)
+
+ assert self._subprocess_thread is not None
+ if self._subprocess_thread.is_alive():
+ print(f'{Clr.CYN}Waiting for subprocess exit...{Clr.RST}',
+ flush=True)
+
+ # Mark ourselves as shutting down and wait for the process to wrap up.
+ self._done = True
+ self._subprocess_thread.join()
+
+ # If there's a server error we should care about, exit the
+ # entire wrapper uncleanly.
+ if self._should_report_subprocess_error:
+ raise CleanError('Server subprocess exited uncleanly.')
+
+ def run(self) -> None:
+ """Do the thing."""
+ if self._interactive:
+ self._run_interactive()
+ else:
+ self._run_noninteractive()
+
+ def _run_noninteractive(self) -> None:
+ """Run the app loop to completion noninteractively."""
+ self._prerun()
+ try:
+ while True:
+ time.sleep(1.234)
+ except KeyboardInterrupt:
+ # Gracefully bow out if we kill ourself via keyboard.
+ pass
+ except SystemExit:
+ # We get this from the builtin quit(), our signal handler, etc.
+ # Need to catch this so we can clean up, otherwise we'll be
+ # left in limbo with our process thread still running.
+ pass
+ self._postrun()
+
+ def _run_interactive(self) -> None:
+ """Run the app loop to completion interactively."""
+ import code
+ self._prerun()
+
+ # Print basic usage info for interactive mode.
+ print(
+ f"{Clr.CYN}Interactive mode enabled; use the 'mgr' object"
+ f' to interact with the server.\n'
+ f"Type 'help(mgr)' for more information.{Clr.RST}",
+ flush=True)
+
+ context = {'__name__': '__console__', '__doc__': None, 'mgr': self}
+
+ # Enable tab-completion if possible.
+ self._enable_tab_completion(context)
+
+ # Now just sit in an interpreter.
+ # TODO: make it possible to use IPython if the user has it available.
+ try:
+ self._interpreter_start_time = time.time()
+ code.interact(local=context, banner='', exitmsg='')
+ except SystemExit:
+ # We get this from the builtin quit(), our signal handler, etc.
+ # Need to catch this so we can clean up, otherwise we'll be
+ # left in limbo with our process thread still running.
+ pass
+ except BaseException as exc:
+ print(
+ f'{Clr.SRED}Unexpected interpreter exception:'
+ f' {exc} ({type(exc)}){Clr.RST}',
+ flush=True)
+
+ self._postrun()
+
+ def cmd(self, statement: str) -> None:
+ """Exec a Python command on the current running server subprocess.
+
+ Note that commands are executed asynchronously and no status or
+ return value is accessible from this manager app.
+ """
+ if not isinstance(statement, str):
+ raise TypeError(f'Expected a string arg; got {type(statement)}')
+ with self._subprocess_commands_lock:
+ self._subprocess_commands.append(statement)
+ self._block_for_command_completion()
+
+ def _block_for_command_completion(self) -> None:
+ # Ideally we'd block here until the command was run so our prompt would
+ # print after it's results. We currently don't get any response from
+ # the app so the best we can do is block until our bg thread has sent
+ # it. In the future we can perhaps add a proper 'command port'
+ # interface for proper blocking two way communication.
+ while True:
+ with self._subprocess_commands_lock:
+ if not self._subprocess_commands:
+ break
+ time.sleep(0.1)
+
+ # One last short delay so if we come out *just* as the command is sent
+ # we'll hopefully still give it enough time to process/print.
+ time.sleep(0.1)
+
+ def screenmessage(self,
+ message: str,
+ color: Optional[Tuple[float, float, float]] = None,
+ clients: Optional[List[int]] = None) -> None:
+ """Display a screen-message.
+
+ This will have no name attached and not show up in chat history.
+ They will show up in replays, however (unless clients is passed).
+ """
+ from bacommon.servermanager import ScreenMessageCommand
+ self._enqueue_server_command(
+ ScreenMessageCommand(message=message, color=color,
+ clients=clients))
+
+ def chatmessage(self,
+ message: str,
+ clients: Optional[List[int]] = None) -> None:
+ """Send a chat message from the server.
+
+ This will have the server's name attached and will be logged
+ in client chat windows, just like other chat messages.
+ """
+ from bacommon.servermanager import ChatMessageCommand
+ self._enqueue_server_command(
+ ChatMessageCommand(message=message, clients=clients))
+
+ def clientlist(self) -> None:
+ """Print a list of connected clients."""
+ from bacommon.servermanager import ClientListCommand
+ self._enqueue_server_command(ClientListCommand())
+ self._block_for_command_completion()
+
+ def kick(self, client_id: int, ban_time: Optional[int] = None) -> None:
+ """Kick the client with the provided id.
+
+ If ban_time is provided, the client will be banned for that
+ length of time in seconds. If it is None, ban duration will
+ be determined automatically. Pass 0 or a negative number for no
+ ban time.
+ """
+ from bacommon.servermanager import KickCommand
+ self._enqueue_server_command(
+ KickCommand(client_id=client_id, ban_time=ban_time))
+
+ def restart(self, immediate: bool = True) -> None:
+ """Restart the server subprocess.
+
+ By default, the current server process will exit immediately.
+ If 'immediate' is passed as False, however, it will instead exit at
+ the next clean transition point (the end of a series, etc).
+ """
+ from bacommon.servermanager import ShutdownCommand, ShutdownReason
+ self._enqueue_server_command(
+ ShutdownCommand(reason=ShutdownReason.RESTARTING,
+ immediate=immediate))
+
+ # If we're asking for an immediate restart but don't get one within
+ # the grace period, bring down the hammer.
+ if immediate:
+ self._subprocess_force_kill_time = (
+ time.time() + self.IMMEDIATE_SHUTDOWN_TIME_LIMIT)
+
+ def shutdown(self, immediate: bool = True) -> None:
+ """Shut down the server subprocess and exit the wrapper.
+
+ By default, the current server process will exit immediately.
+ If 'immediate' is passed as False, however, it will instead exit at
+ the next clean transition point (the end of a series, etc).
+ """
+ from bacommon.servermanager import ShutdownCommand, ShutdownReason
+ self._enqueue_server_command(
+ ShutdownCommand(reason=ShutdownReason.NONE, immediate=immediate))
+
+ # An explicit shutdown means we know to bail completely once this
+ # subprocess completes.
+ self._wrapper_shutdown_desired = True
+
+ # If we're asking for an immediate shutdown but don't get one within
+ # the grace period, bring down the hammer.
+ if immediate:
+ self._subprocess_force_kill_time = (
+ time.time() + self.IMMEDIATE_SHUTDOWN_TIME_LIMIT)
+
+ def _parse_command_line_args(self) -> None:
+ """Parse command line args."""
+ # pylint: disable=too-many-branches
+
+ i = 1
+ argc = len(sys.argv)
+ did_set_interactive = False
+ while i < argc:
+ arg = sys.argv[i]
+ if arg == '--help':
+ self.print_help()
+ sys.exit(0)
+ elif arg == '--config':
+ if i + 1 >= argc:
+ raise CleanError('Expected a config path as next arg.')
+ path = sys.argv[i + 1]
+ if not os.path.exists(path):
+ raise CleanError(
+ f"Supplied path does not exist: '{path}'.")
+ # We need an abs path because we may be in a different
+ # cwd currently than we will be during the run.
+ self._config_path = os.path.abspath(path)
+ self._user_provided_config_path = True
+ i += 2
+ elif arg == '--root':
+ if i + 1 >= argc:
+ raise CleanError('Expected a path as next arg.')
+ path = sys.argv[i + 1]
+ # Unlike config_path, this one doesn't have to exist now.
+ # We do however need an abs path because we may be in a
+ # different cwd currently than we will be during the run.
+ self._ba_root_path = os.path.abspath(path)
+ i += 2
+ elif arg == '--interactive':
+ if did_set_interactive:
+ raise CleanError('interactive/noninteractive can only'
+ ' be specified once.')
+ self._interactive = True
+ did_set_interactive = True
+ i += 1
+ elif arg == '--noninteractive':
+ if did_set_interactive:
+ raise CleanError('interactive/noninteractive can only'
+ ' be specified once.')
+ self._interactive = False
+ did_set_interactive = True
+ i += 1
+ elif arg == '--no-auto-restart':
+ self._auto_restart = False
+ i += 1
+ elif arg == '--no-config-auto-restart':
+ self._config_auto_restart = False
+ i += 1
+ else:
+ raise CleanError(f"Invalid arg: '{arg}'.")
+
+ @classmethod
+ def _par(cls, txt: str) -> str:
+ """Spit out a pretty paragraph for our help text."""
+ import textwrap
+ ind = ' ' * 2
+ out = textwrap.fill(txt, 80, initial_indent=ind, subsequent_indent=ind)
+ return f'{out}\n'
+
+ @classmethod
+ def print_help(cls) -> None:
+ """Print app help."""
+ filename = os.path.basename(__file__)
+ out = (
+ f'{Clr.BLD}{filename} usage:{Clr.RST}\n' + cls._par(
+ 'This script handles configuring, launching, re-launching,'
+ ' and otherwise managing BombSquad operating'
+ ' in server mode. It can be run with no arguments, but'
+ ' accepts the following optional ones:') + f'\n'
+ f'{Clr.BLD}--help:{Clr.RST}\n'
+ f' Show this help.\n'
+ f'\n'
+ f'{Clr.BLD}--config [path]{Clr.RST}\n' + cls._par(
+ 'Set the config file read by the server script. The config'
+ ' file contains most options for what kind of game to host.'
+ ' It should be in yaml format. Note that yaml is backwards'
+ ' compatible with json so you can just write json if you'
+ ' want to. If not specified, the script will look for a'
+ ' file named \'config.yaml\' in the same directory as the'
+ ' script.') + '\n'
+ f'{Clr.BLD}--root [path]{Clr.RST}\n' + cls._par(
+ 'Set the ballistica root directory. This is where the server'
+ ' binary will read and write its caches, state files,'
+ ' downloaded assets to, etc. It needs to be a writable'
+ ' directory. If not specified, the script will use the'
+ ' \'dist/ba_root\' directory relative to itself.') + '\n'
+ f'{Clr.BLD}--interactive{Clr.RST}\n'
+ f'{Clr.BLD}--noninteractive{Clr.RST}\n' + cls._par(
+ 'Specify whether the script should run interactively.'
+ ' In interactive mode, the script creates a Python interpreter'
+ ' and reads commands from stdin, allowing for live interaction'
+ ' with the server. The server script will then exit when '
+ 'end-of-file is reached in stdin. Noninteractive mode creates'
+ ' no interpreter and is more suited to being run in automated'
+ ' scenarios. By default, interactive mode will be used if'
+ ' a terminal is detected and noninteractive mode otherwise.') +
+ '\n'
+ f'{Clr.BLD}--no-auto-restart{Clr.RST}\n' +
+ cls._par('Auto-restart is enabled by default, which means the'
+ ' server manager will restart the server binary whenever'
+ ' it exits (even when uncleanly). Disabling auto-restart'
+ ' will cause the server manager to instead exit after a'
+ ' single run and also to return error codes if the'
+ ' server binary did so.') + '\n'
+ f'{Clr.BLD}--no-config-auto-restart{Clr.RST}\n' + cls._par(
+ 'By default, when auto-restart is enabled, the server binary'
+ ' will be automatically restarted if changes to the server'
+ ' config file are detected. This disables that behavior.'))
+ print(out)
+
+ def load_config(self, strict: bool, print_confirmation: bool) -> None:
+ """Load the config.
+
+ If strict is True, errors will propagate upward.
+ Otherwise, warnings will be printed and repeated attempts will be
+ made to load the config. Eventually the function will give up
+ and leave the existing config as-is.
+ """
+ retry_seconds = 3
+ maxtries = 11
+ for trynum in range(maxtries):
+ try:
+ self._config = self._load_config_from_file(
+ print_confirmation=print_confirmation)
+ return
+ except Exception as exc:
+ if strict:
+ raise CleanError(
+ f'Error loading config file:\n{exc}') from exc
+ print(f'{Clr.RED}Error loading config file:\n{exc}.{Clr.RST}',
+ flush=True)
+ if trynum == maxtries - 1:
+ print(
+ f'{Clr.RED}Max-tries reached; giving up.'
+ f' Existing config values will be used.{Clr.RST}',
+ flush=True)
+ break
+ print(
+ f'{Clr.CYN}Please correct the error.'
+ f' Will re-attempt load in {retry_seconds}'
+ f' seconds. (attempt {trynum+1} of'
+ f' {maxtries-1}).{Clr.RST}',
+ flush=True)
+
+ for _j in range(retry_seconds):
+ # If the app is trying to die, drop what we're doing.
+ if self._done:
+ return
+ time.sleep(1)
+
+ def _load_config_from_file(self, print_confirmation: bool) -> ServerConfig:
+
+ out: Optional[ServerConfig] = None
+
+ if not os.path.exists(self._config_path):
+
+ # Special case:
+ # If the user didn't specify a particular config file, allow
+ # gracefully falling back to defaults if the default one is
+ # missing.
+ if not self._user_provided_config_path:
+ if print_confirmation:
+ print(
+ f'{Clr.YLW}Default config file not found'
+ f' (\'{self._config_path}\'); using default'
+ f' settings.{Clr.RST}',
+ flush=True)
+ self._config_mtime = None
+ self._last_config_mtime_check_time = time.time()
+ return ServerConfig()
+
+ # Don't be so lenient if the user pointed us at one though.
+ raise RuntimeError(
+ f"Config file not found: '{self._config_path}'.")
+
+ import yaml
+ with open(self._config_path) as infile:
+ user_config_raw = yaml.safe_load(infile.read())
+
+ # An empty config file will yield None, and that's ok.
+ if user_config_raw is not None:
+ out = dataclass_from_dict(ServerConfig, user_config_raw)
+
+ # Update our known mod-time since we know it exists.
+ self._config_mtime = Path(self._config_path).stat().st_mtime
+ self._last_config_mtime_check_time = time.time()
+
+ # Go with defaults if we weren't able to load anything.
+ if out is None:
+ out = ServerConfig()
+
+ if print_confirmation:
+ print(f'{Clr.CYN}Valid server config file loaded.{Clr.RST}',
+ flush=True)
+ return out
+
+ def _enable_tab_completion(self, locs: Dict) -> None:
+ """Enable tab-completion on platforms where available (linux/mac)."""
+ try:
+ import readline
+ import rlcompleter
+ readline.set_completer(rlcompleter.Completer(locs).complete)
+ readline.parse_and_bind('tab:complete')
+ except ImportError:
+ # This is expected (readline doesn't exist under windows).
+ pass
+
+ def _bg_thread_main(self) -> None:
+ """Top level method run by our bg thread."""
+ while not self._done:
+ self._run_server_cycle()
+
+ def _handle_term_signal(self, sig: int, frame: FrameType) -> None:
+ """Handle signals (will always run in the main thread)."""
+ del sig, frame # Unused.
+ sys.exit(1 if self._should_report_subprocess_error else 0)
+
+ def _run_server_cycle(self) -> None:
+ """Spin up the server subprocess and run it until exit."""
+ # pylint: disable=consider-using-with
+
+ # Reload our config, and update our overall behavior based on it.
+ # We do non-strict this time to give the user repeated attempts if
+ # if they mess up while modifying the config on the fly.
+ self.load_config(strict=False, print_confirmation=True)
+
+ self._prep_subprocess_environment()
+
+ # Launch the binary and grab its stdin;
+ # we'll use this to feed it commands.
+ self._subprocess_launch_time = time.time()
+
+ # Set an environment var so the server process knows its being
+ # run under us. This causes it to ignore ctrl-c presses and other
+ # slight behavior tweaks. Hmm; should this be an argument instead?
+ os.environ['BA_SERVER_WRAPPER_MANAGED'] = '1'
+
+ print(f'{Clr.CYN}Launching server subprocess...{Clr.RST}', flush=True)
+ binary_name = ('bombsquad_headless.exe'
+ if os.name == 'nt' else './bombsquad_headless')
+ assert self._ba_root_path is not None
+ self._subprocess = None
+
+ # Launch!
+ try:
+ self._subprocess = subprocess.Popen(
+ [binary_name, '-cfgdir', self._ba_root_path],
+ stdin=subprocess.PIPE,
+ cwd='dist')
+ except Exception as exc:
+ self._subprocess_exited_cleanly = False
+ print(
+ f'{Clr.RED}Error launching server subprocess: {exc}{Clr.RST}',
+ flush=True)
+
+ # Do the thing.
+ try:
+ self._run_subprocess_until_exit()
+ except Exception as exc:
+ print(f'{Clr.RED}Error running server subprocess: {exc}{Clr.RST}',
+ flush=True)
+
+ self._kill_subprocess()
+
+ assert self._subprocess_exited_cleanly is not None
+
+ # EW: it seems that if we die before the main thread has fully started
+ # up the interpreter, its possible that it will not break out of its
+ # loop via the usual SystemExit that gets sent when we die.
+ if self._interactive:
+ while (self._interpreter_start_time is None
+ or time.time() - self._interpreter_start_time < 0.5):
+ time.sleep(0.1)
+
+ # Avoid super fast death loops.
+ if (not self._subprocess_exited_cleanly and self._auto_restart
+ and not self._done):
+ time.sleep(5.0)
+
+ # If they don't want auto-restart, we'll exit the whole wrapper.
+ # (and with an error code if things ended badly).
+ if not self._auto_restart:
+ self._wrapper_shutdown_desired = True
+ if not self._subprocess_exited_cleanly:
+ self._should_report_subprocess_error = True
+
+ self._reset_subprocess_vars()
+
+ # If we want to die completely after this subprocess has ended,
+ # tell the main thread to die.
+ if self._wrapper_shutdown_desired:
+
+ # Only do this if the main thread is not already waiting for
+ # us to die; otherwise it can lead to deadlock.
+ # (we hang in os.kill while main thread is blocked in Thread.join)
+ if not self._done:
+ self._done = True
+
+ # This should break the main thread out of its blocking
+ # interpreter call.
+ os.kill(os.getpid(), signal.SIGTERM)
+
+ def _prep_subprocess_environment(self) -> None:
+ """Write files that must exist at process launch."""
+
+ assert self._ba_root_path is not None
+ os.makedirs(self._ba_root_path, exist_ok=True)
+ cfgpath = os.path.join(self._ba_root_path, 'config.json')
+ if os.path.exists(cfgpath):
+ with open(cfgpath) as infile:
+ bincfg = json.loads(infile.read())
+ else:
+ bincfg = {}
+
+ # Some of our config values translate directly into the
+ # bombsquad config file; the rest we pass at runtime.
+ bincfg['Port'] = self._config.port
+ bincfg['Auto Balance Teams'] = self._config.auto_balance_teams
+ bincfg['Show Tutorial'] = self._config.show_tutorial
+
+ if self._config.team_names is not None:
+ bincfg['Custom Team Names'] = self._config.team_names
+ elif 'Custom Team Names' in bincfg:
+ del bincfg['Custom Team Names']
+
+ if self._config.team_colors is not None:
+ bincfg['Custom Team Colors'] = self._config.team_colors
+ elif 'Custom Team Colors' in bincfg:
+ del bincfg['Custom Team Colors']
+
+ bincfg['Idle Exit Minutes'] = self._config.idle_exit_minutes
+ with open(cfgpath, 'w') as outfile:
+ outfile.write(json.dumps(bincfg))
+
+ def _enqueue_server_command(self, command: ServerCommand) -> None:
+ """Enqueue a command to be sent to the server.
+
+ Can be called from any thread.
+ """
+ with self._subprocess_commands_lock:
+ self._subprocess_commands.append(command)
+
+ def _send_server_command(self, command: ServerCommand) -> None:
+ """Send a command to the server.
+
+ Must be called from the server process thread.
+ """
+ import pickle
+ assert current_thread() is self._subprocess_thread
+ assert self._subprocess is not None
+ assert self._subprocess.stdin is not None
+ val = repr(pickle.dumps(command))
+ assert '\n' not in val
+ execcode = (f'import ba._servermode;'
+ f' ba._servermode._cmd({val})\n').encode()
+ self._subprocess.stdin.write(execcode)
+ self._subprocess.stdin.flush()
+
+ def _run_subprocess_until_exit(self) -> None:
+ if self._subprocess is None:
+ return
+
+ assert current_thread() is self._subprocess_thread
+ assert self._subprocess.stdin is not None
+
+ # Send the initial server config which should kick things off.
+ # (but make sure its values are still valid first)
+ dataclass_validate(self._config)
+ self._send_server_command(StartServerModeCommand(self._config))
+
+ while True:
+
+ # If the app is trying to shut down, nope out immediately.
+ if self._done:
+ break
+
+ # Pass along any commands to our process.
+ with self._subprocess_commands_lock:
+ for incmd in self._subprocess_commands:
+ # If we're passing a raw string to exec, no need to wrap it
+ # in any proper structure.
+ if isinstance(incmd, str):
+ self._subprocess.stdin.write((incmd + '\n').encode())
+ self._subprocess.stdin.flush()
+ else:
+ self._send_server_command(incmd)
+ self._subprocess_commands = []
+
+ # Request restarts/shut-downs for various reasons.
+ self._request_shutdowns_or_restarts()
+
+ # If they want to force-kill our subprocess, simply exit this
+ # loop; the cleanup code will kill the process if its still
+ # alive.
+ if (self._subprocess_force_kill_time is not None
+ and time.time() > self._subprocess_force_kill_time):
+ print(
+ f'{Clr.CYN}Immediate shutdown time limit'
+ f' ({self.IMMEDIATE_SHUTDOWN_TIME_LIMIT:.1f} seconds)'
+ f' expired; force-killing subprocess...{Clr.RST}',
+ flush=True)
+ break
+
+ # Watch for the server process exiting..
+ code: Optional[int] = self._subprocess.poll()
+ if code is not None:
+
+ clr = Clr.CYN if code == 0 else Clr.RED
+ print(
+ f'{clr}Server subprocess exited'
+ f' with code {code}.{Clr.RST}',
+ flush=True)
+ self._subprocess_exited_cleanly = (code == 0)
+ break
+
+ time.sleep(0.25)
+
+ def _request_shutdowns_or_restarts(self) -> None:
+ # pylint: disable=too-many-branches
+ assert current_thread() is self._subprocess_thread
+ assert self._subprocess_launch_time is not None
+ now = time.time()
+ minutes_since_launch = (now - self._subprocess_launch_time) / 60.0
+
+ # If we're doing auto-restart with config changes, handle that.
+ if (self._auto_restart and self._config_auto_restart
+ and not self._subprocess_sent_config_auto_restart):
+ if (self._last_config_mtime_check_time is None
+ or (now - self._last_config_mtime_check_time) > 3.123):
+ self._last_config_mtime_check_time = now
+ mtime: Optional[float]
+ if os.path.isfile(self._config_path):
+ mtime = Path(self._config_path).stat().st_mtime
+ else:
+ mtime = None
+ if mtime != self._config_mtime:
+ print(
+ f'{Clr.CYN}Config-file change detected;'
+ f' requesting immediate restart.{Clr.RST}',
+ flush=True)
+ self.restart(immediate=True)
+ self._subprocess_sent_config_auto_restart = True
+
+ # Attempt clean exit if our clean-exit-time passes.
+ # (and enforce a 6 hour max if not provided)
+ clean_exit_minutes = 360.0
+ if self._config.clean_exit_minutes is not None:
+ clean_exit_minutes = min(clean_exit_minutes,
+ self._config.clean_exit_minutes)
+ if clean_exit_minutes is not None:
+ if (minutes_since_launch > clean_exit_minutes
+ and not self._subprocess_sent_clean_exit):
+ opname = 'restart' if self._auto_restart else 'shutdown'
+ print(
+ f'{Clr.CYN}clean_exit_minutes'
+ f' ({clean_exit_minutes})'
+ f' elapsed; requesting soft'
+ f' {opname}.{Clr.RST}',
+ flush=True)
+ if self._auto_restart:
+ self.restart(immediate=False)
+ else:
+ self.shutdown(immediate=False)
+ self._subprocess_sent_clean_exit = True
+
+ # Attempt unclean exit if our unclean-exit-time passes.
+ # (and enforce a 7 hour max if not provided)
+ unclean_exit_minutes = 420.0
+ if self._config.unclean_exit_minutes is not None:
+ unclean_exit_minutes = min(unclean_exit_minutes,
+ self._config.unclean_exit_minutes)
+ if unclean_exit_minutes is not None:
+ if (minutes_since_launch > unclean_exit_minutes
+ and not self._subprocess_sent_unclean_exit):
+ opname = 'restart' if self._auto_restart else 'shutdown'
+ print(
+ f'{Clr.CYN}unclean_exit_minutes'
+ f' ({unclean_exit_minutes})'
+ f' elapsed; requesting immediate'
+ f' {opname}.{Clr.RST}',
+ flush=True)
+ if self._auto_restart:
+ self.restart(immediate=True)
+ else:
+ self.shutdown(immediate=True)
+ self._subprocess_sent_unclean_exit = True
+
+ def _reset_subprocess_vars(self) -> None:
+ self._subprocess = None
+ self._subprocess_launch_time = None
+ self._subprocess_sent_config_auto_restart = False
+ self._subprocess_sent_clean_exit = False
+ self._subprocess_sent_unclean_exit = False
+ self._subprocess_force_kill_time = None
+ self._subprocess_exited_cleanly = None
+
+ def _kill_subprocess(self) -> None:
+ """End the server subprocess if it still exists."""
+ assert current_thread() is self._subprocess_thread
+ if self._subprocess is None:
+ return
+
+ print(f'{Clr.CYN}Stopping subprocess...{Clr.RST}', flush=True)
+
+ # First, ask it nicely to die and give it a moment.
+ # If that doesn't work, bring down the hammer.
+ self._subprocess.terminate()
+ try:
+ self._subprocess.wait(timeout=10)
+ self._subprocess_exited_cleanly = (
+ self._subprocess.returncode == 0)
+ except subprocess.TimeoutExpired:
+ self._subprocess_exited_cleanly = False
+ self._subprocess.kill()
+ print(f'{Clr.CYN}Subprocess stopped.{Clr.RST}', flush=True)
+
+
+def main() -> None:
+ """Run the BombSquad server manager."""
+ try:
+ ServerManagerApp().run()
+ except CleanError as exc:
+ # For clean errors, do a simple print and fail; no tracebacks/etc.
+ # Any others will bubble up and give us the usual mess.
+ exc.pretty_print()
+ sys.exit(1)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/config.yaml b/config.yaml
index 598bfe0..e1eec4f 100644
--- a/config.yaml
+++ b/config.yaml
@@ -9,7 +9,7 @@
# If true, your party will show up in the global public party list
# Otherwise it will still be joinable via LAN or connecting by IP address.
-#party_is_public: true
+party_is_public: true
# If true, all connecting clients will be authenticated through the master
# server to screen for fake account info. Generally this should always
@@ -37,7 +37,7 @@
# multiple controllers. Also, this number currently includes the server so
# generally make it 1 bigger than you need. Max-players is not currently
# exposed but I'll try to add that soon.
-#max_party_size: 6
+max_party_size: 150
# Options here are 'ffa' (free-for-all) and 'teams'
# This value is ignored if you supply a playlist_code (see below).
diff --git a/dist/DLLs/_asyncio.pyd b/dist/DLLs/_asyncio.pyd
new file mode 100644
index 0000000..a622ebf
Binary files /dev/null and b/dist/DLLs/_asyncio.pyd differ
diff --git a/dist/DLLs/_bz2.pyd b/dist/DLLs/_bz2.pyd
new file mode 100644
index 0000000..81197c5
Binary files /dev/null and b/dist/DLLs/_bz2.pyd differ
diff --git a/dist/DLLs/_ctypes.pyd b/dist/DLLs/_ctypes.pyd
new file mode 100644
index 0000000..5d56a63
Binary files /dev/null and b/dist/DLLs/_ctypes.pyd differ
diff --git a/dist/DLLs/_ctypes_test.pyd b/dist/DLLs/_ctypes_test.pyd
new file mode 100644
index 0000000..3df5a1f
Binary files /dev/null and b/dist/DLLs/_ctypes_test.pyd differ
diff --git a/dist/DLLs/_decimal.pyd b/dist/DLLs/_decimal.pyd
new file mode 100644
index 0000000..37980ba
Binary files /dev/null and b/dist/DLLs/_decimal.pyd differ
diff --git a/dist/DLLs/_elementtree.pyd b/dist/DLLs/_elementtree.pyd
new file mode 100644
index 0000000..6e6351b
Binary files /dev/null and b/dist/DLLs/_elementtree.pyd differ
diff --git a/dist/DLLs/_hashlib.pyd b/dist/DLLs/_hashlib.pyd
new file mode 100644
index 0000000..4eee523
Binary files /dev/null and b/dist/DLLs/_hashlib.pyd differ
diff --git a/dist/DLLs/_lzma.pyd b/dist/DLLs/_lzma.pyd
new file mode 100644
index 0000000..692e2fd
Binary files /dev/null and b/dist/DLLs/_lzma.pyd differ
diff --git a/dist/DLLs/_msi.pyd b/dist/DLLs/_msi.pyd
new file mode 100644
index 0000000..b92d08f
Binary files /dev/null and b/dist/DLLs/_msi.pyd differ
diff --git a/dist/DLLs/_multiprocessing.pyd b/dist/DLLs/_multiprocessing.pyd
new file mode 100644
index 0000000..b8d37f7
Binary files /dev/null and b/dist/DLLs/_multiprocessing.pyd differ
diff --git a/dist/DLLs/_overlapped.pyd b/dist/DLLs/_overlapped.pyd
new file mode 100644
index 0000000..f57b89d
Binary files /dev/null and b/dist/DLLs/_overlapped.pyd differ
diff --git a/dist/DLLs/_queue.pyd b/dist/DLLs/_queue.pyd
new file mode 100644
index 0000000..1e03d25
Binary files /dev/null and b/dist/DLLs/_queue.pyd differ
diff --git a/dist/DLLs/_socket.pyd b/dist/DLLs/_socket.pyd
new file mode 100644
index 0000000..0bfe2be
Binary files /dev/null and b/dist/DLLs/_socket.pyd differ
diff --git a/dist/DLLs/_sqlite3.pyd b/dist/DLLs/_sqlite3.pyd
new file mode 100644
index 0000000..9aa65e4
Binary files /dev/null and b/dist/DLLs/_sqlite3.pyd differ
diff --git a/dist/DLLs/_ssl.pyd b/dist/DLLs/_ssl.pyd
new file mode 100644
index 0000000..2f1373b
Binary files /dev/null and b/dist/DLLs/_ssl.pyd differ
diff --git a/dist/DLLs/_testbuffer.pyd b/dist/DLLs/_testbuffer.pyd
new file mode 100644
index 0000000..547449b
Binary files /dev/null and b/dist/DLLs/_testbuffer.pyd differ
diff --git a/dist/DLLs/_testcapi.pyd b/dist/DLLs/_testcapi.pyd
new file mode 100644
index 0000000..df7abae
Binary files /dev/null and b/dist/DLLs/_testcapi.pyd differ
diff --git a/dist/DLLs/_testconsole.pyd b/dist/DLLs/_testconsole.pyd
new file mode 100644
index 0000000..2036004
Binary files /dev/null and b/dist/DLLs/_testconsole.pyd differ
diff --git a/dist/DLLs/_testimportmultiple.pyd b/dist/DLLs/_testimportmultiple.pyd
new file mode 100644
index 0000000..22bafd1
Binary files /dev/null and b/dist/DLLs/_testimportmultiple.pyd differ
diff --git a/dist/DLLs/_testmultiphase.pyd b/dist/DLLs/_testmultiphase.pyd
new file mode 100644
index 0000000..8a0ec5f
Binary files /dev/null and b/dist/DLLs/_testmultiphase.pyd differ
diff --git a/dist/DLLs/_tkinter.pyd b/dist/DLLs/_tkinter.pyd
new file mode 100644
index 0000000..22f8131
Binary files /dev/null and b/dist/DLLs/_tkinter.pyd differ
diff --git a/dist/DLLs/libcrypto-1_1.dll b/dist/DLLs/libcrypto-1_1.dll
new file mode 100644
index 0000000..103d153
Binary files /dev/null and b/dist/DLLs/libcrypto-1_1.dll differ
diff --git a/dist/DLLs/libffi-7.dll b/dist/DLLs/libffi-7.dll
new file mode 100644
index 0000000..ab77c4d
Binary files /dev/null and b/dist/DLLs/libffi-7.dll differ
diff --git a/dist/DLLs/libssl-1_1.dll b/dist/DLLs/libssl-1_1.dll
new file mode 100644
index 0000000..110ac97
Binary files /dev/null and b/dist/DLLs/libssl-1_1.dll differ
diff --git a/dist/DLLs/pyexpat.pyd b/dist/DLLs/pyexpat.pyd
new file mode 100644
index 0000000..ed3d3ea
Binary files /dev/null and b/dist/DLLs/pyexpat.pyd differ
diff --git a/dist/DLLs/python_lib.cat b/dist/DLLs/python_lib.cat
new file mode 100644
index 0000000..11e40dd
Binary files /dev/null and b/dist/DLLs/python_lib.cat differ
diff --git a/dist/DLLs/python_tools.cat b/dist/DLLs/python_tools.cat
new file mode 100644
index 0000000..d242cdd
Binary files /dev/null and b/dist/DLLs/python_tools.cat differ
diff --git a/dist/DLLs/select.pyd b/dist/DLLs/select.pyd
new file mode 100644
index 0000000..95042bb
Binary files /dev/null and b/dist/DLLs/select.pyd differ
diff --git a/dist/DLLs/sqlite3.dll b/dist/DLLs/sqlite3.dll
new file mode 100644
index 0000000..72565ae
Binary files /dev/null and b/dist/DLLs/sqlite3.dll differ
diff --git a/dist/DLLs/sqlite3_d.dll b/dist/DLLs/sqlite3_d.dll
new file mode 100644
index 0000000..91032a3
Binary files /dev/null and b/dist/DLLs/sqlite3_d.dll differ
diff --git a/dist/DLLs/tcl86t.dll b/dist/DLLs/tcl86t.dll
new file mode 100644
index 0000000..3c78e2d
Binary files /dev/null and b/dist/DLLs/tcl86t.dll differ
diff --git a/dist/DLLs/tk86t.dll b/dist/DLLs/tk86t.dll
new file mode 100644
index 0000000..d401cf9
Binary files /dev/null and b/dist/DLLs/tk86t.dll differ
diff --git a/dist/DLLs/unicodedata.pyd b/dist/DLLs/unicodedata.pyd
new file mode 100644
index 0000000..2b975e4
Binary files /dev/null and b/dist/DLLs/unicodedata.pyd differ
diff --git a/dist/DLLs/winsound.pyd b/dist/DLLs/winsound.pyd
new file mode 100644
index 0000000..a98b1ff
Binary files /dev/null and b/dist/DLLs/winsound.pyd differ
diff --git a/dist/ba_data/python/ba/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/__init__.cpython-38.opt-1.pyc
index e6c7a2c..a7ede6c 100644
Binary files a/dist/ba_data/python/ba/__pycache__/__init__.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_account.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_account.cpython-38.opt-1.pyc
index 40a1057..6585e74 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_account.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_account.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_achievement.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_achievement.cpython-38.opt-1.pyc
index 3b3cad5..aea784d 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_achievement.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_achievement.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_activity.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_activity.cpython-38.opt-1.pyc
index 9c88dd1..abf2f0d 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_activity.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_activity.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_activitytypes.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_activitytypes.cpython-38.opt-1.pyc
index 823fc25..642d59e 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_activitytypes.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_activitytypes.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_actor.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_actor.cpython-38.opt-1.pyc
index a5c1929..e9f2dba 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_actor.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_actor.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_ads.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_ads.cpython-38.opt-1.pyc
index 2bccc88..2195f4a 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_ads.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_ads.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_analytics.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_analytics.cpython-38.opt-1.pyc
index 6d903d2..6bc87b7 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_analytics.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_analytics.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_app.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_app.cpython-38.opt-1.pyc
index 73c772b..fec4e2b 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_app.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_app.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_appconfig.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_appconfig.cpython-38.opt-1.pyc
index cb20ca5..477beb8 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_appconfig.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_appconfig.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_appdelegate.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_appdelegate.cpython-38.opt-1.pyc
index 29a33c5..a46bb9a 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_appdelegate.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_appdelegate.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_appmode.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_appmode.cpython-38.opt-1.pyc
deleted file mode 100644
index a5fc879..0000000
Binary files a/dist/ba_data/python/ba/__pycache__/_appmode.cpython-38.opt-1.pyc and /dev/null differ
diff --git a/dist/ba_data/python/ba/__pycache__/_apputils.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_apputils.cpython-38.opt-1.pyc
index 60314f2..715ca86 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_apputils.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_apputils.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_assetmanager.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_assetmanager.cpython-38.opt-1.pyc
deleted file mode 100644
index 4a0ff80..0000000
Binary files a/dist/ba_data/python/ba/__pycache__/_assetmanager.cpython-38.opt-1.pyc and /dev/null differ
diff --git a/dist/ba_data/python/ba/__pycache__/_benchmark.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_benchmark.cpython-38.opt-1.pyc
index b9fd6cd..c974e0b 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_benchmark.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_benchmark.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_campaign.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_campaign.cpython-38.opt-1.pyc
index ca42ee3..5fa974a 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_campaign.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_campaign.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_collision.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_collision.cpython-38.opt-1.pyc
index 0a6491f..13da3ff 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_collision.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_collision.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_coopgame.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_coopgame.cpython-38.opt-1.pyc
index 0ce61ae..7703dbf 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_coopgame.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_coopgame.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_coopsession.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_coopsession.cpython-38.opt-1.pyc
index ea51f22..542e59c 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_coopsession.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_coopsession.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_dependency.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_dependency.cpython-38.opt-1.pyc
index 5acb3bd..3daf6bc 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_dependency.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_dependency.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_dualteamsession.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_dualteamsession.cpython-38.opt-1.pyc
index 762edc7..856fd27 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_dualteamsession.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_dualteamsession.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_enums.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_enums.cpython-38.opt-1.pyc
index 02cf68c..58ef7df 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_enums.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_enums.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_error.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_error.cpython-38.opt-1.pyc
index 65ef5bf..c7705dc 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_error.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_error.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_freeforallsession.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_freeforallsession.cpython-38.opt-1.pyc
index a2cc867..21c6872 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_freeforallsession.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_freeforallsession.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_gameactivity.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_gameactivity.cpython-38.opt-1.pyc
index e9c2b38..af0bdee 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_gameactivity.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_gameactivity.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_gameresults.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_gameresults.cpython-38.opt-1.pyc
index 941fefa..e470267 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_gameresults.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_gameresults.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_gameutils.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_gameutils.cpython-38.opt-1.pyc
index c02a6bf..aae53fb 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_gameutils.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_gameutils.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_general.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_general.cpython-38.opt-1.pyc
index 200f786..73ca6a4 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_general.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_general.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_hooks.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_hooks.cpython-38.opt-1.pyc
index 66c8831..3a8a8ec 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_hooks.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_hooks.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_input.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_input.cpython-38.opt-1.pyc
index da117c0..9dcc15c 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_input.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_input.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_keyboard.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_keyboard.cpython-38.opt-1.pyc
index 3150e2b..21bc5b9 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_keyboard.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_keyboard.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_language.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_language.cpython-38.opt-1.pyc
index f4cbe65..48b2afe 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_language.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_language.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_level.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_level.cpython-38.opt-1.pyc
index c7882c5..540b5f4 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_level.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_level.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_lobby.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_lobby.cpython-38.opt-1.pyc
index ae75657..058bcbf 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_lobby.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_lobby.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_map.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_map.cpython-38.opt-1.pyc
index 6bff2f1..cdabbec 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_map.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_map.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_math.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_math.cpython-38.opt-1.pyc
index 4d73580..82c0c77 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_math.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_math.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_messages.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_messages.cpython-38.opt-1.pyc
index 57ae03b..c7d6de2 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_messages.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_messages.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_meta.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_meta.cpython-38.opt-1.pyc
index b0f30c7..acf1cd1 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_meta.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_meta.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_multiteamsession.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_multiteamsession.cpython-38.opt-1.pyc
index 0a828a7..b0fcffc 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_multiteamsession.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_multiteamsession.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_music.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_music.cpython-38.opt-1.pyc
index bf7bd42..5aed1f5 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_music.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_music.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_net.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_net.cpython-38.opt-1.pyc
index c7632f3..30dd100 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_net.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_net.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_nodeactor.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_nodeactor.cpython-38.opt-1.pyc
index 4e4f8a0..f880b08 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_nodeactor.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_nodeactor.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_player.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_player.cpython-38.opt-1.pyc
index b6e49c4..91e94d6 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_player.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_player.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_playlist.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_playlist.cpython-38.opt-1.pyc
index 51ea408..b4b96f4 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_playlist.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_playlist.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_plugin.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_plugin.cpython-38.opt-1.pyc
index ac87754..9faf590 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_plugin.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_plugin.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_powerup.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_powerup.cpython-38.opt-1.pyc
index 7d94125..fe2e5ae 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_powerup.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_powerup.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_profile.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_profile.cpython-38.opt-1.pyc
index 375f7c8..d0a5658 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_profile.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_profile.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_score.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_score.cpython-38.opt-1.pyc
index e9067d8..07c9a5b 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_score.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_score.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_servermode.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_servermode.cpython-38.opt-1.pyc
index 7d27974..3c72557 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_servermode.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_servermode.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_session.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_session.cpython-38.opt-1.pyc
index 10d5fe3..70aa6f6 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_session.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_session.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_settings.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_settings.cpython-38.opt-1.pyc
index e368479..0fdb74e 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_settings.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_settings.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_stats.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_stats.cpython-38.opt-1.pyc
index e731942..ed0c3a6 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_stats.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_stats.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_store.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_store.cpython-38.opt-1.pyc
index 2e9ba16..7040ab4 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_store.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_store.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_team.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_team.cpython-38.opt-1.pyc
index b1cfdcf..0dc7b96 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_team.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_team.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_teamgame.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_teamgame.cpython-38.opt-1.pyc
index 815cf9d..2b7c0b5 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_teamgame.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_teamgame.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_tips.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_tips.cpython-38.opt-1.pyc
index 363353d..e3a817d 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_tips.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_tips.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_tournament.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_tournament.cpython-38.opt-1.pyc
index 6392b3b..43fcf9c 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_tournament.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_tournament.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/_ui.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/_ui.cpython-38.opt-1.pyc
index f975cc3..0fb00e6 100644
Binary files a/dist/ba_data/python/ba/__pycache__/_ui.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/_ui.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/deprecated.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/deprecated.cpython-38.opt-1.pyc
deleted file mode 100644
index a013cc1..0000000
Binary files a/dist/ba_data/python/ba/__pycache__/deprecated.cpython-38.opt-1.pyc and /dev/null differ
diff --git a/dist/ba_data/python/ba/__pycache__/internal.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/internal.cpython-38.opt-1.pyc
index 66056c2..0c4e17c 100644
Binary files a/dist/ba_data/python/ba/__pycache__/internal.cpython-38.opt-1.pyc and b/dist/ba_data/python/ba/__pycache__/internal.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/ba/__pycache__/macmusicapp.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/macmusicapp.cpython-38.opt-1.pyc
deleted file mode 100644
index 24f5163..0000000
Binary files a/dist/ba_data/python/ba/__pycache__/macmusicapp.cpython-38.opt-1.pyc and /dev/null differ
diff --git a/dist/ba_data/python/ba/__pycache__/modutils.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/modutils.cpython-38.opt-1.pyc
deleted file mode 100644
index 52212a8..0000000
Binary files a/dist/ba_data/python/ba/__pycache__/modutils.cpython-38.opt-1.pyc and /dev/null differ
diff --git a/dist/ba_data/python/ba/__pycache__/osmusic.cpython-38.opt-1.pyc b/dist/ba_data/python/ba/__pycache__/osmusic.cpython-38.opt-1.pyc
deleted file mode 100644
index ca98907..0000000
Binary files a/dist/ba_data/python/ba/__pycache__/osmusic.cpython-38.opt-1.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/__pycache__/__init__.cpython-38.opt-1.pyc
index 7fa5e46..eccea6a 100644
Binary files a/dist/ba_data/python/bastd/__pycache__/__init__.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/__pycache__/__init__.cpython-38.pyc b/dist/ba_data/python/bastd/__pycache__/__init__.cpython-38.pyc
deleted file mode 100644
index 29e3f6c..0000000
Binary files a/dist/ba_data/python/bastd/__pycache__/__init__.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/__pycache__/appdelegate.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/__pycache__/appdelegate.cpython-38.opt-1.pyc
index 56546ac..8bc0ec0 100644
Binary files a/dist/ba_data/python/bastd/__pycache__/appdelegate.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/__pycache__/appdelegate.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/__pycache__/appdelegate.cpython-38.pyc b/dist/ba_data/python/bastd/__pycache__/appdelegate.cpython-38.pyc
deleted file mode 100644
index a0d67b6..0000000
Binary files a/dist/ba_data/python/bastd/__pycache__/appdelegate.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/__pycache__/gameutils.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/__pycache__/gameutils.cpython-38.opt-1.pyc
index 15da1c6..c40a260 100644
Binary files a/dist/ba_data/python/bastd/__pycache__/gameutils.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/__pycache__/gameutils.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/__pycache__/gameutils.cpython-38.pyc b/dist/ba_data/python/bastd/__pycache__/gameutils.cpython-38.pyc
deleted file mode 100644
index 1504bc6..0000000
Binary files a/dist/ba_data/python/bastd/__pycache__/gameutils.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/__pycache__/mainmenu.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/__pycache__/mainmenu.cpython-38.opt-1.pyc
index 198885c..c9ea8f6 100644
Binary files a/dist/ba_data/python/bastd/__pycache__/mainmenu.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/__pycache__/mainmenu.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/__pycache__/mainmenu.cpython-38.pyc b/dist/ba_data/python/bastd/__pycache__/mainmenu.cpython-38.pyc
deleted file mode 100644
index 356b77e..0000000
Binary files a/dist/ba_data/python/bastd/__pycache__/mainmenu.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/__pycache__/maps.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/__pycache__/maps.cpython-38.opt-1.pyc
index bb5b27d..cfcaf4a 100644
Binary files a/dist/ba_data/python/bastd/__pycache__/maps.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/__pycache__/maps.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/__pycache__/maps.cpython-38.pyc b/dist/ba_data/python/bastd/__pycache__/maps.cpython-38.pyc
deleted file mode 100644
index 51a339d..0000000
Binary files a/dist/ba_data/python/bastd/__pycache__/maps.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/__pycache__/stdmap.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/__pycache__/stdmap.cpython-38.opt-1.pyc
deleted file mode 100644
index cfed2ff..0000000
Binary files a/dist/ba_data/python/bastd/__pycache__/stdmap.cpython-38.opt-1.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/__pycache__/tutorial.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/__pycache__/tutorial.cpython-38.opt-1.pyc
index e2af84f..fd743e7 100644
Binary files a/dist/ba_data/python/bastd/__pycache__/tutorial.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/__pycache__/tutorial.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/__pycache__/tutorial.cpython-38.pyc b/dist/ba_data/python/bastd/__pycache__/tutorial.cpython-38.pyc
deleted file mode 100644
index b01e944..0000000
Binary files a/dist/ba_data/python/bastd/__pycache__/tutorial.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/__init__.cpython-38.opt-1.pyc
index c5da340..0f7315c 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/__init__.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/__init__.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/__init__.cpython-38.pyc
deleted file mode 100644
index 6da5c86..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/__init__.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/background.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/background.cpython-38.opt-1.pyc
index 6535a63..3ab80ec 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/background.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/background.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/background.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/background.cpython-38.pyc
deleted file mode 100644
index 56d8306..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/background.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/bomb.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/bomb.cpython-38.opt-1.pyc
index 0a61b32..f34bb1e 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/bomb.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/bomb.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/bomb.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/bomb.cpython-38.pyc
deleted file mode 100644
index f06c2c9..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/bomb.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/controlsguide.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/controlsguide.cpython-38.opt-1.pyc
index 2995a15..da7c4e9 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/controlsguide.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/controlsguide.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/controlsguide.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/controlsguide.cpython-38.pyc
deleted file mode 100644
index 93988b0..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/controlsguide.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/flag.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/flag.cpython-38.opt-1.pyc
index cf81de8..5931888 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/flag.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/flag.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/flag.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/flag.cpython-38.pyc
deleted file mode 100644
index 9242d8e..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/flag.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/image.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/image.cpython-38.opt-1.pyc
index 02cfd9c..5afc864 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/image.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/image.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/image.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/image.cpython-38.pyc
deleted file mode 100644
index 196f99a..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/image.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/onscreencountdown.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/onscreencountdown.cpython-38.opt-1.pyc
index 3b341fa..ae15f2a 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/onscreencountdown.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/onscreencountdown.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/onscreencountdown.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/onscreencountdown.cpython-38.pyc
deleted file mode 100644
index e0c7352..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/onscreencountdown.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/onscreentimer.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/onscreentimer.cpython-38.opt-1.pyc
index 4eb8a5b..5a95d23 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/onscreentimer.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/onscreentimer.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/onscreentimer.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/onscreentimer.cpython-38.pyc
deleted file mode 100644
index 95a2fe4..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/onscreentimer.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/playerspaz.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/playerspaz.cpython-38.opt-1.pyc
index f0da456..9605781 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/playerspaz.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/playerspaz.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/playerspaz.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/playerspaz.cpython-38.pyc
deleted file mode 100644
index 80127f3..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/playerspaz.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/popuptext.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/popuptext.cpython-38.opt-1.pyc
index 138f317..4eeef83 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/popuptext.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/popuptext.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/popuptext.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/popuptext.cpython-38.pyc
deleted file mode 100644
index fa3aafe..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/popuptext.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/powerupbox.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/powerupbox.cpython-38.opt-1.pyc
index 26a50a2..3b5a704 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/powerupbox.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/powerupbox.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/powerupbox.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/powerupbox.cpython-38.pyc
deleted file mode 100644
index 24108e4..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/powerupbox.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/respawnicon.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/respawnicon.cpython-38.opt-1.pyc
index 5754cbe..88b6fad 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/respawnicon.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/respawnicon.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/respawnicon.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/respawnicon.cpython-38.pyc
deleted file mode 100644
index 1b4f5aa..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/respawnicon.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/scoreboard.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/scoreboard.cpython-38.opt-1.pyc
index 49ce4b6..ac40c1b 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/scoreboard.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/scoreboard.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/scoreboard.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/scoreboard.cpython-38.pyc
deleted file mode 100644
index d61be52..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/scoreboard.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/spawner.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/spawner.cpython-38.opt-1.pyc
deleted file mode 100644
index 9a3e8b2..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/spawner.cpython-38.opt-1.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/spaz.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/spaz.cpython-38.opt-1.pyc
index c528ddb..eb307f6 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/spaz.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/spaz.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/spaz.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/spaz.cpython-38.pyc
deleted file mode 100644
index 656baaa..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/spaz.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/spazappearance.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/spazappearance.cpython-38.opt-1.pyc
index d377f0a..e5322bf 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/spazappearance.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/spazappearance.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/spazappearance.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/spazappearance.cpython-38.pyc
deleted file mode 100644
index c5d4654..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/spazappearance.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/spazbot.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/spazbot.cpython-38.opt-1.pyc
index 9768920..58d3d5b 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/spazbot.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/spazbot.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/spazbot.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/spazbot.cpython-38.pyc
deleted file mode 100644
index a14e31e..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/spazbot.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/spazfactory.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/spazfactory.cpython-38.opt-1.pyc
index 5183a92..cffc80e 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/spazfactory.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/spazfactory.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/spazfactory.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/spazfactory.cpython-38.pyc
deleted file mode 100644
index 79d19e2..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/spazfactory.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/text.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/text.cpython-38.opt-1.pyc
index 8a3ab71..fd466fe 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/text.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/text.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/text.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/text.cpython-38.pyc
deleted file mode 100644
index dff61a9..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/text.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/tipstext.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/tipstext.cpython-38.opt-1.pyc
index 60e30f1..a4a4bc1 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/tipstext.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/tipstext.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/tipstext.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/tipstext.cpython-38.pyc
deleted file mode 100644
index eb01009..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/tipstext.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/zoomtext.cpython-38.opt-1.pyc b/dist/ba_data/python/bastd/actor/__pycache__/zoomtext.cpython-38.opt-1.pyc
index 3f078f6..5de76af 100644
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/zoomtext.cpython-38.opt-1.pyc and b/dist/ba_data/python/bastd/actor/__pycache__/zoomtext.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_data/python/bastd/actor/__pycache__/zoomtext.cpython-38.pyc b/dist/ba_data/python/bastd/actor/__pycache__/zoomtext.cpython-38.pyc
deleted file mode 100644
index 7a79d5d..0000000
Binary files a/dist/ba_data/python/bastd/actor/__pycache__/zoomtext.cpython-38.pyc and /dev/null differ
diff --git a/dist/ba_root/.bsac2 b/dist/ba_root/.bsac2
new file mode 100644
index 0000000..d46c080
--- /dev/null
+++ b/dist/ba_root/.bsac2
@@ -0,0 +1 @@
+DBRdDAdZXxReVhxNFDUAQ1cAUkkAVQ9cAFhTUl9LAlAFBh9dX04OHFpWXApQUhxVVgQDFBZbShNCTRJKTExXVg1XAWJDVVlBBFoDX14NFxoJbTkBQwJaD0haFXh1DwJQAA8cDgZIGxJfUl9SRhZqA1dFBkYCAhpGWQgCXQNZR0MLBR0URE1dTFRBNFUHT11dREYWagNXRQZGAgMaRlkEAFsVfVgASF1ESxJKGhdCT0gbAFBQZFtTHFUeRUUQN0xSQgEPW1EaREtSAGFdTEEKUEtZShBeQhsaE0RbUxxcBWZDDAJRX1MXfhUAQgMPDlAdE11UB0BdX2lAC18PWlRHFQgeGyhpUAAHV0ZYEHI+QwwdD1cOXV5fGlxiCAMMHlQXVAMdBRkCOBVVXlgEDFRaUQxZQ1ttVgMBTQEfHw1KCBYdZB5GWg5XQ1VURgBLVQwTMBRZSRQZUE1DVwVFXQRHVEBdCE1Lfk1TEFxEDAAYFVMGUR5TRwYJXV1CFw9bGhQgX1EEEX1CWQJcSg8DSUZ6CVtBWFJGABtNQkMWAUVOGkZOAAxGB0RTD0JiWVkSXBoXCR5GWgdbQVVeVQtKVQxKHkgaQ0MWTgkARQNeZxVQRUgaXAkUD0lHFloOV0JRRBBfQgoaEw4NS1BgBUESMkIHWVFDCwEBGgtQS05vUwhKRAxKSRsQCFAEVWMGBVxlVwheMhVXEkgWWwcCFQtWFRpAUEEHawNXVWJWXhYbTU0TAgdQckEFXwVPew9DUUF2UEBdFRsCGRUQFEsPVVQaXlEKVwQYVw8FX2xEEV4SCFdEFwBRAR0PWQVReVpYQAAXIVlFFENaABk6WUcGFxoJDkgPERNfBUgaAllQX1kFTV1fShwGTAhYSBYNA1cJRxoTBRZRVlgAeRMYdxFMRgVlWE5TA01LDwMDUQlKFEFGXlEAFx5VXg0XFlVaBUo+Al4PQVFDCwUdCEobSF9QUQEXD1VeWkQcA1UWUW4OAUBaVQsPW1UGVgEWAFJZYV0HXk1IaV0NVxJFH2ZCXARLGENfB0R/XFJGF1JRGkRMVwl9VExfE1xoQlBcEEpIZkNbF3QKVgNUUA8IGGBeEVkOFEJEFwZRHRNMWw54T0xLVkprE1hQRlhHC11Xe1AQEF1BFF4ZTUNXBUV4BFBWWF02VlFDTUFKaRRZEXtZQQlYAlFZF0RuWlUQQhMYFFwcBE0TUEFRN2t+X1BXCl0zZH0WDRBBQiRzYzUhak4ZBVwTXlVbCU8ifnVoRUQVGl1WRQFLNFdfX15cAmkYX18XFxZSVQxEBBdTC0haFRMLGhREWFxeF0EQWBRCZ1VbAEcDRhgBT0ZZV0VKXhUARBJ7VQ0AExcISAABARtCFlAFUx9XX1MXWBRCVBEXFkNXF04ADRRcHAZRAR0PSAlOXV9rUwpSD1hWZFhbC00EGEURC0hbT1UPW1QaRF1bFlRDf1kIUlFDXmILUAhCQhpDQApJH08DQV4JAxpGXQ4WUxR/VQ9aWENfNlZRQ01BSk0UWUFcTgFHA0UDHUEFW1t6AUwGFFM2Ql0PRUIDbA5cGGpLVwVNRmFQWFsQXw1HGhMCB1ByQQVfBU95AEsUOF5EDX8JGWxFXFxGA1IaE1VUWilcFlFEBjRXWlgQXk8zQwhMRg5EX0kYMVBCTEtWRgNXAx0WR0AMWhIYWAALVkAYAkEABmkXTEAAQxMXDFYJFA9JQA1aAxhSXFZABFoDU0MQSlJWRRBIEz5bFA8OUAQBHRREWFtFdVcFXhNTYVteXBFKWXlfEAhZRlEMWUE1RAdEWghfVg1uD1pMQktLRgNTGhNVVFokThZEVU0xWlZERH8UD1cUQkEPVRF7UQVNV19AEF4AShRQV19zElgFUh8hC1deFiNCBBIWEkVRQXVIQ1kLUExIGwhRFURGXkNSQDdYGV1YDQNoXF8KWRJPQhRCRAlIAUwaXAgUD1xTF00DRBMOUVMJShIaEwIHUH9TBUoUBGYJRFoVQh99SgNaUV5QXQoZJFlcVl5cAhtNBAFPRllQXiVaABNSSH5AAEhYQx9GeFRET1dGA18aE1JFWwBXE2JDGiFAR0QFbBYARAJ5XQJaVFlLRAMMHQkeRkkUX1JRGVUEVBJFHw0NVllXO0sIBl4SDw5TBAEBGgdaTERPVwpcFUV8VU8QXwhZBARPRkhBXwdITwhVCUNHT1ddTF85SldYTVo7UglEVFUVCFEJRxoTAgdQckEFXwVPegdeQEFiRUxWAhl1TEpGAUtEDAgYFVMBSkUYXAIcdHB/ClkEE0AHQRZbAAMdFERaV0NNWwpMA2VFVUVGJlYEQhNZVggfFBRfCAJTSERXDl9CA14KWF9yV1cQUQNEXVVZVhYbTQIBU0gaQ0QNTgRPQglYRg9QXEhWEmZdQ01AHRtcAx0WR0AMWhIYUgsFSlJVEEgTEhgRRE4AQ1VyVRQbAhsJAkgbB1VZdUBTF11ZZkMMRHddRQhMFAZeEg1iCFJFQkofGwIYFRAFWg53RlVFVktqH1dDCgpfE18XDSIARA9DU0MLABgURFhbRXVXBV4TU2FbXlwRSlllRQIdUV0RRGwNCEADDw5VAR0PTA9aU0hNQVZ4C1lEWkMQXwxHBh1BBVtbdxNMEwUYNl9bQXdeQkwEWFRBGWQNWhJZQ00VCFMVVUZDCgddHV8HQg8SGABBVQZuWENcD1gaFw0CVBVEV1Jce1cEXgJTYQwNVkdFSn0TDhYkQkwEQxMXClYVGl1LWwdcSF9SW1lBS18bV1Y8DVZXWQpIEghXRBcAUQEdD1kFUXlaWEAAFzJeVBRgUwlVVQwHT0ZZUF4oSAAGQwN9WwhfRV4WMlxZQBliCFgfU0MWDQBVFVVXUgslT1JEAAMtAEUSDWcVUF9JGCFWXA8DA1AVREZDXVRXS1AUWV8QSl5aRAFPAA1aRBcGUQEBARoHWlBhXFMDTANmXl1ZRhYXMFlFQxBQVhYpQhcERUQXB1EdE1VVB0oaF19TCEoDGhNERVsGXFlfUgwKSx1QCEwGPkMNDw5VAQEBGhZIcUNNEF4IVgYBBBsQBFofelQCA01WZgtEDxVFSH1GDhF3QlcSW1lBVRIyUAVCXkZOEF8IRxoTExZRUFNKRAIOWBUDUg1QVnJdAUBIWRsIUAlWGhNVU0FXFxpXSS8nGgkHVANRTRQHTlwtVFBKTQNpV0RXRhcXKVBXFG5dEBkwWRE3DF1dFF4YTUNGFERXBB9YTlcIShZJS1MDVggUCwUFAlUVVVdSCyVPUkQAYBQNQkQXAE0TUE5QJ05ZX10cNEsDVVhHXl0LGTVZXAENVlQUXhtNQ0YURFcEH1JFWRRYW1lcQBcXHFlUFg0BVQlbFEERDVtWGA1ODg9FSENdD1tQcksSWEoPAwpUCUoUVFpWUAlcNkZBKgpOWkIBXkNbQhRYUU0TUE5QJ05ZX10cNlYJXVhRF3QKVgNUUA8IGGBeEVkOFEJEFwFNE1BOUCdOWV9dHDRLCRZ3W1hGB1gbWhEwDE1HWRFZQ1sASg9EE1hSSBYPWldDShwJTBVeQ1tYX0cDRgMBT0ZZUF4oSAAGQwN9WwhfRV4WNFZXRlBXRHYIRV1VQlUNTVdgWAAQV0FPRhdUTRQWX10CVB9EWwlXSwNfXgVeOUVQQVNbOlgFV1MKBRoJAlQdTUNGFERXBB9SRVkUWFtZXEAXFwxXUl9aXRdeFlgTWVAIAxpGXRMIVQMDXQJeX14WDlhUDwMBUQlKFFBQRABLVB5YfSBGAgMYVAFDAFUObEMAQ1UDaxNJXV8ZYhFXBV4TDgIeR1gUXnAUBUpXGCJBABZaA15HQWdYTkwJS0EPAwZIGwdVWXVAUxddWWJZBkR/QVMFWUE2VwpBFlsIHQ9IFFBbSBdbB1YIRR9HXEcJVVUMAFNUCB8UFF8IAlNIQFURQh9BWQ1cZ0tLWwNQAhQLAgICSRsWVVkvAVlUQwF9DghYEl4aM0RfTEoJTFZJGX8FShJTQxYNA1UVVUJGERBQQBReVkMRRAkPDkNFWE5TA01LGRseRkkUWW5HVl4AG00URQoHU1ZCFxlDHBpEXUYIUlQDXwdUXV4XVwVKElNDa1JVAmYfQ18XRgILBlQBQwBVDmFRAFZESGgJUFZZShwiSwNTEXhYUwFcBRQLUlQUEVcAXk8MVx5hdzJSUEFdRAMIAwgeRkkUX1JRGVsGVhlFHxUNU1pYA3IJBFoLSEBDCwAYCFYVGl1LWwdcSF9SW1lBS18bV1Y8EUsRDFAdUU0UC0xMLVhfRnkFWldYV0YXG1wDHRZWUQ14AFdDB0pqRlgFXw4UWAINcw5VExcBShtIX1BRARcBWl5WVl46SQVZVwoIXREMVR1RTRQHQVgORnBOWwlMVll1WwpSD1hWBhUIEUsCUx1BFEpaVQEDDABGFQNXAENFQlcIZl5CS0Y7VBQUCwwHAkkbB0ReMAVUVnIRXwAVXwlDeQhfRFldFRsCHRUQEFAFXVRARAYkVBhDXxdGAgEDVB1NQ1cFRXUWUENJFiJMWUEZZQ1cClJYWlAQXwhHGhMRF1xBFF4cUU8GSg9EDkZUX2oHV1NEV1U0Vg9YRUcZRhdWB15IV0YCBwZIDwACXidaVRNVH35NFlxKDXRXA1hGZkRaVFpHA0EaExMWUVBTSkQCDlgVA1INUFZyWQpeXV9QU0YDUgYBGBVTBlE7U1AEEV1jWQ1DFRIYKkxHFRFiWVkIXRhgWEEQXBQUCwYHHkdNHlVaBhBLAHcJQhQPQkQXBVMBAQEaFktRTlwcDVoJWEIaUV4EXihUQwIeUV8UXhlRURpETFcJfVRMXxNcaEJQXBBKSGZDWxdgEFcWRF4WClwTYA1OFQ5EHw8OUAQdD1kFUXlaWEAAFylYQlhWRwJRAxZlEQVRXV8KSkE3XwVZWxNIExcMShtZTlFzE1gUUh9yRVcAGTtZUAcBShEMVR1NQ1cCXhoMUElhe0QDDAMJHkZYBV5wQ1ZAARciVFQRRHddRQhMFAZeEg1iCFJFQkofGwIaFRAUSw9VVBpDXRBLGVdcBgpMbFMKWRMYaVIPDlMEHQ9IFFBbSBdbB1YIRR9SW1MCZhpXXQIdS1pXRhdVUQZKD0QTWFJIFhJWTV9XUwlcCEJuUVlGF0AoBBNZVQ0fFBRfCAJTSFlbFENfTFUDV0xyXFwQSx9pAhYNAFUVVUZDCgddHUILWBMPVwtIWhVuVENMFEBnHRsIURVERkNdVFdLTRhDQw0FVVZYEHIED0IUVGtQEwscCEobWUlKHBdRCURFfVlGAEsBV10uEVRHFF4dT1MDSg9EE1hSSBYFUVlfWFEQXBRFH0BWXQdYGFtQEAdXRxReGVFRBkoPRBNYUkgWBVFZX1hREFwURR9HVlwRWFUMAFVUCB8UFl0RQwxXAwZNE1BOUCdOWV9dHDFbA0QRclhdEVsWWl1DN1BGQgtYFUMMXgEWEUNYTl1IUFtCV0FKVAlZXxYNB1UJWxRBEQt6XFgRXjUIVQ1IQBITCxsIVhUaTlZcEFAIQ1RHekcJTVUMA09GWVBeJVoAE1JIf1sOWlhIGClXS0FYRwNREhZnXVRGCksOFAtXSBpSUjZCFQBCA2BbBVQTFwhKG0pdWlVGA1UGARgVUwFKRRhcCgp0cH8KWQQTQAdBFlsACR0UREtIThsIUAlWGhNVVFopXBZRRAY0V1pYEF5PI1keSEZDCwAdFERJSkRaV0paDldDVVRGAEsEGFIME2deREYXVlQGSg9EE1hSSBYPWldDShwCVQdRbl1FUwsbTQIBU0gaVlgFTw0EdQlDQAhfREhLRANMX0xXSBsWRFhXUhwMWhhYQk0MXVJEEA9bUwZWARYAUllsTwdLXAN7XRxcFBQLABsQFUseVVRNB1BSRAVOFQREFQNSE15CWUFEAwkdCQJIGwdSQgYZXwRBO3ViAAVUVhReHU9YA0oPVQJZfUhZAUxdfVZbCk0VGGVcUhIyWBtaE1lWDR8UFl4MEhRcGARRHRNMVgdVQVlQURd6JBQLQEVHABVVV1ILKF1SURFIMQ5fCFlHT2RTSEpGf1dCTVAFVQoWZ11URgpLDhQLUVQUEUYWRAIEGAVFVRNQUlldFEoWTF5XCk1EDAgEBx5HWBRefQYFX0ZTNEIID0IVA2cUQVRfGDZMVk5REF4IVhoTREVbBlxZX1IMCksdUAhMBj5QFExaAlQTFwxWCRQPSUANWgMYUlxWQARaA1NDEEpaVkQKTBMFFFwVBFEdE0xbDnVdTF5HAWkJX19ARBwwWxJEESwKS19XEUoJFRYwRFcVXkNUGlwLCAEbUwpdFBQLTxVRB0tVDEUREV0fFAVbDEMMVgMBTRNYQEoAGwIdFwJUDEoUUlZTEF9NBUNUT0ZZXVoAQUNbBFYBFghcVw8CVxcIARtTCBtcQkNBUh5HWBlaVRBGAgYaRk4VFxRcWUYUVB0PTA0bAktYXhdcGxoTREVbBlxZX1IMCksdUAhMBj5GDkRYCEFBRFYDShoXDQJUFURGQ11UV0taH1dDAgdMVkQXAxEITg9IFlsABR0IShtIX1BRARcPVV5aRBwDVRZRbhMLVFJYAA9bVQZWARYAVXJCVRZVXVlccSYbXFBQWERXSRsWVVkvAVlUQwF9DghYEl4aLl9CQVkTXlBZGWUNQwdEVRYNA1AVVUZeFAFKYVcKRggPUTZCXQ9FQgNMFFZIRUACBhtcBB0WVlENdRJXVhYBaFxfClkST3oHXkBBYkVMVgIZb0RDUxZdRAwFBBsQFUseVVRNDVtcWBcDBw1XAXJXAF9QSVlEAwwdCR5GSRRfUlEZUQ1YBVdSFwFKQBgPQwgGXhJyWRMTCxwIVgkUD1hRDHUDV1ZBUmIKUBlCQk03TUNTFg0sBFEHDWQUX1JFGlwIDQEbQhZQBVMfV19TF1gUQlQRFxZeUwgPW1QGVgEWAFJZbE8HS1wDcFxEeglYRUZYXkcDQhoTExZRUFNKSgAMUxUDRw9eRk9ZClVnS1BVDE05W0MWDQpVCVsUUAAMdFZXA1gEMVkPQ0ASH35DSwpYTUpRRkR+CVITDgQCSRsWVVkvAVlUQwF9DghYEl4aM15eRlEDGX5CVkYGWApaEWJeURFWBU8TWVUIHxQUXwgCU0hOXABDUE5MA0tLA05bHlgUUhMOBgRVCVsUX0FeGn5ZAEkEE0VKDVcJVFJGGAlMTA1bUwhVD0VFXVRTS1cSQhEFC0oTWRRID0xFCVhGAlQRT1cLW0tcTFMAGRJZXlhEEkMZBFVDChRMQBg4Q10DRFhxWideXUFXERl6QlRQN0gTV1UUWFxFfxZVVAELV1gMREsAAlMEQlsKH1JCVUl7V0BbYRVMB1J2VVpXOVdLVENdOFZ1WRFDBUFXRk9BBhFeXxgOWE5IGVNESBNTQkBeXQsGV3NcAg1UE0URXREORBJtUhNeVEBUD1dfA1dXEBtKFEFGXlEAFx5VXg0XFlVaBUo+El8ISlURXkNIGlwNCB0VEAVaDnpUVVBHAGkYX18XFxZxWQtAQSZZA14UFVlUDXwfV1lAUEYBG1wHBBgVUwZRO1NQBBFdY1kNQxUSGDJjYEFlVF9KCUsaFwoCSBsWRFhXUhwMWhhYQk0XSFpSAV9DWwFWHRhDQUNEWwMXUU5WXBcXAFpQU2hRH1wUXm4RAUhGVAhEAkMMUh0ETRNBX1EFXBZEWl0KSkhQVFBYQAQbTQABU0gaQ0QNTgRPXwVCWhIfV0FZAWZbRVBcBRtcAgEEGxAVSx5VVE0NW1xYFwMHDVcBclMEQ1xMVh8bAhkJAkgbFVddUUQQX0IKGhMTFlFQU0pEAg5YFQNcBF1cSExEAwsfCQJIGwdVWXhSUwJMEmZeCgpMQBg3RQATXwhKFAhCEW5ZFFBWShsIVQxKFFJbWUYMVwJTQiwCXkBTEA9bURpEWV0CWlRZS1d4VUJMXBAbXAMBGBVCF1AUUx8KB1ddRUpLDQBROUdVEVBfDwJSCQgBG0IWUAVTH1dfUxdYFEJUERcWQFgFRgQSXgdJWxYTCxsIVhUaTFpaJU4HRFUaZ0AKGSVDXwIWV0ZYAA03CFUSQkYYEwsbFERYW0V1VwVeE1NhW15cEUpZZF4MD1FWFiJCDhVUB0FYQWJZWEwJTEwPAwNRFURCWFdcVxFKQndcDBFWRxReG1RRBkoPVQJZfUhZAUxdfVZbCk0VGHdYVkUJXARFETUNW0dZFlRDWwdTARYSQV5DSwlLbERaWQFNFRQLBQIeR0kFX1IGSlFQWQpeTwRPA09VDV0TFwpTCRQPWFYXC0hbWFp7cTZaFlpUQV4IHQJIDwACXipIVQZEVH1XD1dMXhd/DVcDFnZVWlcWG00HAU9GSEFfB0hPCFUJQ0dPV11MXzlMWUgbCFAJVhoTREVbBlxZVVkCFllQQgFfEk9VH09bE1YTFwpTCQgBG0IWVjZZRlFFYARXHF9fBCZXXEUQD1tQBkoPVQJZfUhZAUxdfVZbCk0VGHVBVl5Fbh5TXQcNVlQUXhxRTRQWX10CVB9OUAdLWU5NVxZKSFReWlJBRwNABgFPRllXRUpACA96JWRaFVRDW1kKGwIeDwJIGwdVWXVAUxddWWNTBhYYdVkLWQMAWgoNYghSRUJKHxsCFRUQBVoOd0ZVRVZLfhhaVUMpUV1TFg9bVhpEXUYIUlQDUQVWVl4XURZWEVgTDgEGVQlbFFAADHlEVxZJTzVTB0AUMV1QVF0UGwIfCR5GWAVCEw5RUwlKEhoTExZRUFNKRAIOWBUDUg1QVnJTE05ZRE0QXg1WBh0WVlENeABXQwdKd11FCEwUBl4SDXkAQkVISkQDDAEbUwdRKlNQU0JXNVYeWEUQSm1RUxYNJw5ZEk9VDV0RflATTVdYTRBeDVYaE1VUWiROFkRVTTB2ZxYwSBMTWRQPDlYdE0xbDnVdTF5HAWkJX19ARBwiVhtSES4NVlZERhdSURpETFcJfVRMXxNcaEJQXBBKSHlfR1tTEF4fQhEuBUtHUxYPW1AGSg9VBUIfQFEIdXsPAwJKCUoUQUZeUQAXAkZWEQVcVkVKXRMOFFwcBFEBHQ9ZBVF0SFhVEVw2WVhaQ0FLcBkWcgwKTEFZCA9bVBpETFcJfVRMXxNcaEJQXBBKSHpQR0MSNk0WWFVDI1dXFF4YUU0UFl9dAlQfRFsJV0sDX14FXjlfRVVbS0cDQwYBT0ZIQV8HSE8IVQlDR09XXUxfOVhNXk1ABVUPVxMOAwJVFVVEQRMDGgkHShpNQ1cFRXUWUENJFjZLVw17XRxcFBQLARsQBFofd0YCFlwdZAtCCghTRmtbDkVTTFQKGW5EWkYLSx8UCwEbEBVLHlVUTQNZXlMXAxUARAFIQD5BQ0xbElBbSBsIUwxWGhNVVFokThZEVU0rVkBaBVgGCUJGel0bUENJGlwPFA9fQA1cCFJlRk5mDFocU0UQRgIHBlQBQwBSFQNZCF99bmsFWFRIGwhUF1caE0RFWwZcWVFQDgFLHVsBWQQORDleXA5GVF8aXAwIHRUQF1ABWHhaY1sGUhJCQkFeYwIGSBxUTQdeAQZRHQMfFFQKFB8NHlYMOxoTVVRaJE4WRFVNK1ZAWgVYBglCRmpbBRMLFBREWFxeF18FQSp1eFpDVxdPFloTWVYMAxpGXRMIVQMDVwlQQ0xbElxKXhdZFlYIXRMOBQJVFVVGQwoHXR1fB0IPEhgfRFoYUF9KGlwKCB0VEAVaDndGVUVWS3UWRUVDN0xSWAANNghMB19QQwsAHBREWFtFdVcFXhNTYVteXBFKWWNTBhYYYUMKTBMOQwhJFDdYUllXFEAaFwsCSBsWRFhXUhwMWhhYQk0CVFJRO0wTBlMIWV0PUBMXDFYJFA9YUQx4EVdDUBlgEFcWRF4WClwTYQ1XABNSRBcCHB0TQFEVWmpIWFYyWApFA2dDUxFcVQwDT0ZVWkUHfwQAUjBMWBIDExdDRFVRQ1JXAHgFVV5BWUYWC1UMahhGVhEMRn4EE0ADXwNUAAIcDkQVGkkbCEZlE1MBBwdhAEsBU0NUUQkAB1IPTUNfAg8OQ0FTAHUKDWx4e3cteDNGdWNQRgFoJVJpIg9QdGcxawQ2Wg17bDcFf3hUBW5rfEloNWs0dGRDcVFHRCoaExcPChEMPw8VUxstQQxQaXMcDitsQElvWi5vNHAEZ2MDK3dVax1BCFFdXQFJIAJVCVhaFUITF2NEZU1ICQFUagNER1FFBVAIRAcHQTkUEUIPD1tDQg1MGSpdCRxgJAgOYGgPWRtKFENRRF0JTxJScAAHV0ZYEGQlQwxEXVZMeHcZFTBsCFVzc1kEREsdFlBTCFwkU0MVDVtWdwdFCARAA0BRD0VCDwI9G35fXFdEdQlXVVFFEDhECg==
\ No newline at end of file
diff --git a/dist/ba_root/.bsuuid b/dist/ba_root/.bsuuid
new file mode 100644
index 0000000..ad6d00d
--- /dev/null
+++ b/dist/ba_root/.bsuuid
@@ -0,0 +1 @@
+61cd836d-aa6f-4a11-8f98-92d9f61472e9
\ No newline at end of file
diff --git a/dist/ba_root/config.json b/dist/ba_root/config.json
index b099ee4..8d8fa4a 100644
--- a/dist/ba_root/config.json
+++ b/dist/ba_root/config.json
@@ -129,14 +129,22 @@
"Campaigns": {},
"Default Player Profiles": {
"Client Input Device #1": "__account__",
+ "Client Input Device #10": "__account__",
"Client Input Device #2": "__account__",
- "Client Input Device #3": "__account__"
+ "Client Input Device #3": "__account__",
+ "Client Input Device #5": "__account__",
+ "Client Input Device #6": "__account__",
+ "Client Input Device #7": "__account__",
+ "Client Input Device #8": "AARAV SINGH",
+ "Client Input Device #9": "__account__"
},
+ "Free-for-All Max Players": 20,
"Free-for-All Playlist Randomize": true,
"Free-for-All Playlist Selection": "__default__",
"Free-for-All Playlists": {},
"Idle Exit Minutes": null,
- "Local Account Name": "Server137026",
+ "Local Account Name": "Server751316",
+ "PSTR": 0,
"Player Profiles": {
"__account__": {
"character": "Spaz",
@@ -153,15 +161,37 @@
}
},
"Plugins": {
- "privateserver.private": {
+ "characters_duplicate.unlock_characters": {
"enabled": true
}
},
"Port": 43210,
+ "Region Pings": {
+ "af-south-1": 332.0533000000001,
+ "ap-northeast-1": 153.0722999999998,
+ "ap-northeast-2": 177.3448999999996,
+ "ap-south-1": 44.40215280000021,
+ "ap-southeast-1": 154.81258440000002,
+ "ap-southeast-2": 177.44980000000066,
+ "ca-central-1": 237.2696999999997,
+ "eu-central-1": 161.99570000000028,
+ "eu-north-1": 173.3184000000003,
+ "eu-south-1": 150.99719999999994,
+ "eu-west-1": 174.4621000000004,
+ "eu-west-2": 172.11209999999966,
+ "eu-west-3": 160.90859999999995,
+ "me-south-1": 74.56136400000022,
+ "sa-east-1": 360.89020000000005,
+ "us-east-1": 235.8709999999995,
+ "us-east-2": 249.04710000000028,
+ "us-west-1": 270.4567000000004,
+ "us-west-2": 291.1767000000012
+ },
"Show Tutorial": false,
"Signed In Last Session": false,
+ "Team Game Max Players": 20,
"Team Tournament Playlists": {},
- "launchCount": 24,
+ "launchCount": 82,
"lc14173": 1,
"lc14292": 1
}
\ No newline at end of file
diff --git a/dist/ba_root/config.json.prev b/dist/ba_root/config.json.prev
index 0844937..bc40dd8 100644
--- a/dist/ba_root/config.json.prev
+++ b/dist/ba_root/config.json.prev
@@ -1 +1,197 @@
-{"Achievements": {"Boom Goes the Dynamite": {"Complete": false}, "Boxer": {"Complete": false}, "Dual Wielding": {"Complete": false}, "Flawless Victory": {"Complete": false}, "Free Loader": {"Complete": true}, "Gold Miner": {"Complete": false}, "Got the Moves": {"Complete": false}, "In Control": {"Complete": false}, "Last Stand God": {"Complete": false}, "Last Stand Master": {"Complete": false}, "Last Stand Wizard": {"Complete": false}, "Mine Games": {"Complete": false}, "Off You Go Then": {"Complete": false}, "Onslaught God": {"Complete": false}, "Onslaught Master": {"Complete": false}, "Onslaught Training Victory": {"Complete": false}, "Onslaught Wizard": {"Complete": false}, "Precision Bombing": {"Complete": false}, "Pro Boxer": {"Complete": false}, "Pro Football Shutout": {"Complete": false}, "Pro Football Victory": {"Complete": false}, "Pro Onslaught Victory": {"Complete": false}, "Pro Runaround Victory": {"Complete": false}, "Rookie Football Shutout": {"Complete": false}, "Rookie Football Victory": {"Complete": false}, "Rookie Onslaught Victory": {"Complete": false}, "Runaround God": {"Complete": false}, "Runaround Master": {"Complete": false}, "Runaround Wizard": {"Complete": false}, "Sharing is Caring": {"Complete": false}, "Stayin' Alive": {"Complete": false}, "Super Mega Punch": {"Complete": false}, "Super Punch": {"Complete": false}, "TNT Terror": {"Complete": false}, "Team Player": {"Complete": false}, "The Great Wall": {"Complete": false}, "The Wall": {"Complete": false}, "Uber Football Shutout": {"Complete": false}, "Uber Football Victory": {"Complete": false}, "Uber Onslaught Victory": {"Complete": false}, "Uber Runaround Victory": {"Complete": false}}, "Auto Account State": "Server", "Auto Balance Teams": true, "Campaigns": {}, "Default Player Profiles": {"Client Input Device #1": "__account__", "Client Input Device #2": "__account__", "Client Input Device #3": "__account__"}, "Free-for-All Playlist Randomize": true, "Free-for-All Playlist Selection": "__default__", "Free-for-All Playlists": {}, "Idle Exit Minutes": null, "Local Account Name": "Server137026", "Player Profiles": {"__account__": {"character": "Spaz", "color": [0.5, 0.25, 1.0], "highlight": [0.5, 0.25, 1.0]}}, "Plugins": {"privateserver.private": {"enabled": true}}, "Port": 43210, "Show Tutorial": false, "Signed In Last Session": false, "Team Tournament Playlists": {}, "launchCount": 23, "lc14173": 1, "lc14292": 1}
\ No newline at end of file
+{
+ "Achievements": {
+ "Boom Goes the Dynamite": {
+ "Complete": false
+ },
+ "Boxer": {
+ "Complete": false
+ },
+ "Dual Wielding": {
+ "Complete": false
+ },
+ "Flawless Victory": {
+ "Complete": false
+ },
+ "Free Loader": {
+ "Complete": true
+ },
+ "Gold Miner": {
+ "Complete": false
+ },
+ "Got the Moves": {
+ "Complete": false
+ },
+ "In Control": {
+ "Complete": false
+ },
+ "Last Stand God": {
+ "Complete": false
+ },
+ "Last Stand Master": {
+ "Complete": false
+ },
+ "Last Stand Wizard": {
+ "Complete": false
+ },
+ "Mine Games": {
+ "Complete": false
+ },
+ "Off You Go Then": {
+ "Complete": false
+ },
+ "Onslaught God": {
+ "Complete": false
+ },
+ "Onslaught Master": {
+ "Complete": false
+ },
+ "Onslaught Training Victory": {
+ "Complete": false
+ },
+ "Onslaught Wizard": {
+ "Complete": false
+ },
+ "Precision Bombing": {
+ "Complete": false
+ },
+ "Pro Boxer": {
+ "Complete": false
+ },
+ "Pro Football Shutout": {
+ "Complete": false
+ },
+ "Pro Football Victory": {
+ "Complete": false
+ },
+ "Pro Onslaught Victory": {
+ "Complete": false
+ },
+ "Pro Runaround Victory": {
+ "Complete": false
+ },
+ "Rookie Football Shutout": {
+ "Complete": false
+ },
+ "Rookie Football Victory": {
+ "Complete": false
+ },
+ "Rookie Onslaught Victory": {
+ "Complete": false
+ },
+ "Runaround God": {
+ "Complete": false
+ },
+ "Runaround Master": {
+ "Complete": false
+ },
+ "Runaround Wizard": {
+ "Complete": false
+ },
+ "Sharing is Caring": {
+ "Complete": false
+ },
+ "Stayin' Alive": {
+ "Complete": false
+ },
+ "Super Mega Punch": {
+ "Complete": false
+ },
+ "Super Punch": {
+ "Complete": false
+ },
+ "TNT Terror": {
+ "Complete": false
+ },
+ "Team Player": {
+ "Complete": false
+ },
+ "The Great Wall": {
+ "Complete": false
+ },
+ "The Wall": {
+ "Complete": false
+ },
+ "Uber Football Shutout": {
+ "Complete": false
+ },
+ "Uber Football Victory": {
+ "Complete": false
+ },
+ "Uber Onslaught Victory": {
+ "Complete": false
+ },
+ "Uber Runaround Victory": {
+ "Complete": false
+ }
+ },
+ "Auto Account State": "Server",
+ "Auto Balance Teams": true,
+ "Campaigns": {},
+ "Default Player Profiles": {
+ "Client Input Device #1": "__account__",
+ "Client Input Device #10": "__account__",
+ "Client Input Device #2": "__account__",
+ "Client Input Device #3": "__account__",
+ "Client Input Device #5": "__account__",
+ "Client Input Device #6": "__account__",
+ "Client Input Device #7": "__account__",
+ "Client Input Device #8": "AARAV SINGH",
+ "Client Input Device #9": "__account__"
+ },
+ "Free-for-All Max Players": 20,
+ "Free-for-All Playlist Randomize": true,
+ "Free-for-All Playlist Selection": "__default__",
+ "Free-for-All Playlists": {},
+ "Idle Exit Minutes": null,
+ "Local Account Name": "Server751316",
+ "PSTR": 0,
+ "Player Profiles": {
+ "__account__": {
+ "character": "Spaz",
+ "color": [
+ 0.5,
+ 0.25,
+ 1.0
+ ],
+ "highlight": [
+ 0.5,
+ 0.25,
+ 1.0
+ ]
+ }
+ },
+ "Plugins": {
+ "characters_duplicate.unlock_characters": {
+ "enabled": true
+ }
+ },
+ "Port": 43210,
+ "Region Pings": {
+ "af-south-1": 312.64150000000154,
+ "ap-northeast-1": 153.12689879999954,
+ "ap-northeast-2": 165.05455279999813,
+ "ap-south-1": 44.22486380000026,
+ "ap-southeast-1": 88.89644679999981,
+ "ap-southeast-2": 177.04750000000047,
+ "ca-central-1": 239.786500000001,
+ "eu-central-1": 161.11233520000104,
+ "eu-north-1": 179.9086999999986,
+ "eu-south-1": 146.85989999999904,
+ "eu-west-1": 177.47380000000135,
+ "eu-west-2": 176.71720000000235,
+ "eu-west-3": 163.49899299999828,
+ "me-south-1": 76.81473880000011,
+ "sa-east-1": 348.776400000002,
+ "us-east-1": 235.8861999999995,
+ "us-east-2": 250.7388999999982,
+ "us-west-1": 266.3696999999985,
+ "us-west-2": 279.6805000000013
+ },
+ "Show Tutorial": false,
+ "Signed In Last Session": false,
+ "Team Game Max Players": 20,
+ "Team Tournament Playlists": {},
+ "launchCount": 82,
+ "lc14173": 1,
+ "lc14292": 1
+}
\ No newline at end of file
diff --git a/dist/ba_root/mods/__pycache__/characters_duplicate.cpython-38.opt-1.pyc b/dist/ba_root/mods/__pycache__/characters_duplicate.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..0331ec5
Binary files /dev/null and b/dist/ba_root/mods/__pycache__/characters_duplicate.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/__pycache__/custom_hooks.cpython-38.opt-1.pyc b/dist/ba_root/mods/__pycache__/custom_hooks.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..78d428f
Binary files /dev/null and b/dist/ba_root/mods/__pycache__/custom_hooks.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/__pycache__/setting.cpython-38.opt-1.pyc b/dist/ba_root/mods/__pycache__/setting.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..f9a4375
Binary files /dev/null and b/dist/ba_root/mods/__pycache__/setting.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/chatHandle/ChatCommands/Main.py b/dist/ba_root/mods/chatHandle/ChatCommands/Main.py
index 7a9e5e9..4f393ca 100644
--- a/dist/ba_root/mods/chatHandle/ChatCommands/Main.py
+++ b/dist/ba_root/mods/chatHandle/ChatCommands/Main.py
@@ -67,16 +67,25 @@ def Command(msg, clientid):
elif command_type(command) == "Manage":
if check_permissions(accountid, command):
Management.ExcelCommand(command, arguments, clientid, accountid)
+ _ba.screenmessage("Executed", transient=True, clients=[clientid])
+ else:
+ _ba.screenmessage("access denied", transient=True, clients=[clientid])
elif command_type(command) == "Fun":
if check_permissions(accountid, command):
Fun.ExcelCommand(command, arguments, clientid, accountid)
+ _ba.screenmessage("Executed", transient=True, clients=[clientid])
+ else:
+ _ba.screenmessage("access denied", transient=True, clients=[clientid])
elif command_type(command) == "Cheats":
if check_permissions(accountid, command):
Cheats.ExcelCommand(command, arguments, clientid, accountid)
+ _ba.screenmessage("Executed", transient=True, clients=[clientid])
+ else:
+ _ba.screenmessage("access denied", transient=True, clients=[clientid])
settings = setting.get_settings_data()
diff --git a/dist/ba_root/mods/chatHandle/ChatCommands/__pycache__/Main.cpython-38.opt-1.pyc b/dist/ba_root/mods/chatHandle/ChatCommands/__pycache__/Main.cpython-38.opt-1.pyc
index 5613991..5ebebc8 100644
Binary files a/dist/ba_root/mods/chatHandle/ChatCommands/__pycache__/Main.cpython-38.opt-1.pyc and b/dist/ba_root/mods/chatHandle/ChatCommands/__pycache__/Main.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/chatHandle/ChatCommands/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/ba_root/mods/chatHandle/ChatCommands/__pycache__/__init__.cpython-38.opt-1.pyc
index e832d5c..babaa26 100644
Binary files a/dist/ba_root/mods/chatHandle/ChatCommands/__pycache__/__init__.cpython-38.opt-1.pyc and b/dist/ba_root/mods/chatHandle/ChatCommands/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/chatHandle/ChatCommands/__pycache__/handlers.cpython-38.opt-1.pyc b/dist/ba_root/mods/chatHandle/ChatCommands/__pycache__/handlers.cpython-38.opt-1.pyc
index d4f55a8..98eb9eb 100644
Binary files a/dist/ba_root/mods/chatHandle/ChatCommands/__pycache__/handlers.cpython-38.opt-1.pyc and b/dist/ba_root/mods/chatHandle/ChatCommands/__pycache__/handlers.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/chatHandle/ChatCommands/commands/Management.py b/dist/ba_root/mods/chatHandle/ChatCommands/commands/Management.py
index e4e67aa..f75e8d4 100644
--- a/dist/ba_root/mods/chatHandle/ChatCommands/commands/Management.py
+++ b/dist/ba_root/mods/chatHandle/ChatCommands/commands/Management.py
@@ -1,12 +1,12 @@
-from .Handlers import handlemsg, handlemsg_all
+from .Handlers import handlemsg, handlemsg_all,send
from playersData import pdata
from tools.whitelist import add_to_white_list, add_commit_to_logs
import ba, _ba, time, setting
-Commands = ['kick', 'remove', 'end', 'quit', 'mute', 'unmute', 'slowmo', 'nv', 'dv', 'pause', 'cameramode', 'createrole', 'addrole', 'removerole', 'addcommand', 'addcmd', 'removecommand', 'removecmd', 'changetag', 'add', 'spectators', 'lobbytime']
-CommandAliases = ['rm', 'next', 'restart', 'mutechat', 'unmutechat', 'sm', 'slow', 'night', 'day', 'pausegame', 'camera_mode', 'rotate_camera', 'whitelist']
+Commands = ['kick', 'remove', 'end', 'quit', 'mute', 'unmute', 'slowmo', 'nv', 'dv', 'pause', 'cameramode', 'createrole', 'addrole', 'removerole', 'addcommand', 'addcmd', 'removecommand','getroles', 'removecmd', 'changetag','customtag','customeffect','add', 'spectators', 'lobbytime']
+CommandAliases = ['rm', 'next', 'restart', 'mutechat', 'unmutechat', 'sm', 'slow', 'night', 'day', 'pausegame', 'camera_mode', 'rotate_camera', 'whitelist','effect']
@@ -64,6 +64,9 @@ def ExcelCommand(command, arguments, clientid, accountid):
elif command == 'removerole':
remove_role_from_player(arguments)
+
+ elif command=='getroles':
+ get_roles_of_player(arguments,clientid)
elif command in ['addcommand', 'addcmd']:
add_command_to_role(arguments)
@@ -74,6 +77,12 @@ def ExcelCommand(command, arguments, clientid, accountid):
elif command == 'changetag':
change_role_tag(arguments)
+ elif command=='customtag':
+ set_custom_tag(arguments)
+
+ elif command in ['customeffect','effect']:
+ set_custom_effect(arguments)
+
elif command in ['add', 'whitelist']:
whitelst_it(accountid, arguments)
@@ -88,9 +97,8 @@ def ExcelCommand(command, arguments, clientid, accountid):
-
-
def kick(arguments):
+ _ba.disconnect_client(int(arguments[0]))
return
@@ -134,7 +142,9 @@ def remove(arguments):
else:
try:
session = _ba.get_foreground_host_session()
- session.sessionplayers[int(arguments[0])].remove_from_game()
+ for i in session.sessionplayers:
+ if i.inputdevice.client_id== int(arguments[0]):
+ i.remove_from_game()
except:
return
@@ -226,10 +236,9 @@ def add_role_to_player(arguments):
try:
session = _ba.get_foreground_host_session()
-
- id = session.sessionplayers[int(arguments[1])].get_account_id()
-
- pdata.add_player_role(arguments[0], id)
+ for i in session.sessionplayers:
+ if i.inputdevice.client_id== int(arguments[1]):
+ roles=pdata.add_player_role(arguments[0],i.get_account_id())
except:
return
@@ -238,20 +247,49 @@ def add_role_to_player(arguments):
def remove_role_from_player(arguments):
try:
session = _ba.get_foreground_host_session()
-
- id = session.sessionplayers[int(arguments[1])].get_account_id()
-
- pdata.remove_player_role(arguments[0], id)
+ for i in session.sessionplayers:
+ if i.inputdevice.client_id== int(arguments[1]):
+ roles=pdata.remove_player_role(arguments[0],i.get_account_id())
+
except:
return
-
-
+def get_roles_of_player(arguments,clientid):
+ try:
+ session = _ba.get_foreground_host_session()
+ roles=[]
+ reply=""
+ for i in session.sessionplayers:
+ if i.inputdevice.client_id== int(arguments[0]):
+ roles=pdata.get_player_roles(i.get_account_id())
+ print(roles)
+ for role in roles:
+ reply=reply+role+","
+ send(reply,clientid)
+ except:
+ return
def change_role_tag(arguments):
try:
pdata.change_role_tag(arguments[0], arguments[1])
except:
return
+def set_custom_tag(arguments):
+ try:
+ session = _ba.get_foreground_host_session()
+ for i in session.sessionplayers:
+ if i.inputdevice.client_id== int(arguments[1]):
+ roles=pdata.set_tag(arguments[0],i.get_account_id())
+ except:
+ return
+def set_custom_effect(arguments):
+ try:
+ session = _ba.get_foreground_host_session()
+ for i in session.sessionplayers:
+ if i.inputdevice.client_id== int(arguments[1]):
+ roles=pdata.set_effect(arguments[0],i.get_account_id())
+ except:
+ return
+
all_commands = ["changetag","createrole", "addrole", "removerole", "addcommand", "addcmd","removecommand","removecmd","kick","remove","rm","end","next","quit","restart","mute","mutechat","unmute","unmutechat","sm","slow","slowmo","nv","night","dv","day","pause","pausegame","cameraMode","camera_mode","rotate_camera","kill","die","heal","heath","curse","cur","sleep","sp","superpunch","gloves","punch","shield","protect","freeze","ice","unfreeze","thaw","gm","godmode","fly","inv","invisible","hl","headless","creepy","creep","celebrate","celeb","spaz"]
diff --git a/dist/ba_root/mods/chatHandle/ChatCommands/commands/NormalCommands.py b/dist/ba_root/mods/chatHandle/ChatCommands/commands/NormalCommands.py
index e9147b3..73f4199 100644
--- a/dist/ba_root/mods/chatHandle/ChatCommands/commands/NormalCommands.py
+++ b/dist/ba_root/mods/chatHandle/ChatCommands/commands/NormalCommands.py
@@ -1,6 +1,6 @@
from .Handlers import send
import ba, _ba
-
+from stats import mystats
Commands = ['me', 'list', 'uniqeid']
CommandAliases = ['stats', 'score', 'rank', 'myself', 'l', 'id', 'pb-id', 'pb', 'accountid']
@@ -21,7 +21,7 @@ def ExcelCommand(command, arguments, clientid, accountid):
None
"""
if command in ['me', 'stats', 'score', 'rank', 'myself']:
- stats()
+ stats(accountid,clientid)
elif command in ['list', 'l']:
list(clientid)
@@ -33,9 +33,11 @@ def ExcelCommand(command, arguments, clientid, accountid):
-def stats():
- """ Stats """
- return
+def stats(ac_id,clientid):
+ stats=mystats.get_stats_by_id(ac_id)
+ reply="Score:"+str(stats["scores"])+"\nGames:"+str(stats["games"])+"\nKills:"+str(stats["kills"])+"\nDeaths:"+str(stats["deaths"])+"\nAvg.:"+str(stats["avg_score"])
+ send(reply,clientid)
+
@@ -53,7 +55,7 @@ def list(clientid):
for i in session.sessionplayers:
list += p.format(i.getname(icon = False),
- i.inputdevice.client_id, i.id)
+ i.inputdevice.client_id, i.id)+"\n"
send(list, clientid)
diff --git a/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/NormalCommands.cpython-38.opt-1.pyc b/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/NormalCommands.cpython-38.opt-1.pyc
index 91f97f0..42f4907 100644
Binary files a/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/NormalCommands.cpython-38.opt-1.pyc and b/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/NormalCommands.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/cheats.cpython-38.opt-1.pyc b/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/cheats.cpython-38.opt-1.pyc
index ea99d15..0ace767 100644
Binary files a/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/cheats.cpython-38.opt-1.pyc and b/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/cheats.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/fun.cpython-38.opt-1.pyc b/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/fun.cpython-38.opt-1.pyc
index 551074d..d5e6a06 100644
Binary files a/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/fun.cpython-38.opt-1.pyc and b/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/fun.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/handlers.cpython-38.opt-1.pyc b/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/handlers.cpython-38.opt-1.pyc
index 49fefad..14de924 100644
Binary files a/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/handlers.cpython-38.opt-1.pyc and b/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/handlers.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/management.cpython-38.opt-1.pyc b/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/management.cpython-38.opt-1.pyc
index 682a5fd..6d12910 100644
Binary files a/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/management.cpython-38.opt-1.pyc and b/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/management.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/normal_commands.cpython-38.opt-1.pyc b/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/normal_commands.cpython-38.opt-1.pyc
deleted file mode 100644
index 0d79fe5..0000000
Binary files a/dist/ba_root/mods/chatHandle/ChatCommands/commands/__pycache__/normal_commands.cpython-38.opt-1.pyc and /dev/null differ
diff --git a/dist/ba_root/mods/chatHandle/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/ba_root/mods/chatHandle/__pycache__/__init__.cpython-38.opt-1.pyc
index 4b887b9..6d65948 100644
Binary files a/dist/ba_root/mods/chatHandle/__pycache__/__init__.cpython-38.opt-1.pyc and b/dist/ba_root/mods/chatHandle/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/chatHandle/__pycache__/handlechat.cpython-38.opt-1.pyc b/dist/ba_root/mods/chatHandle/__pycache__/handlechat.cpython-38.opt-1.pyc
index ceb3e53..d0d5317 100644
Binary files a/dist/ba_root/mods/chatHandle/__pycache__/handlechat.cpython-38.opt-1.pyc and b/dist/ba_root/mods/chatHandle/__pycache__/handlechat.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/custom_hooks.py b/dist/ba_root/mods/custom_hooks.py
index 5803b5f..97a009e 100644
--- a/dist/ba_root/mods/custom_hooks.py
+++ b/dist/ba_root/mods/custom_hooks.py
@@ -1,6 +1,7 @@
from chatHandle import handlechat
def filter_chat_message(msg, client_id):
+
return handlechat.filter_chat_message(msg, client_id)
diff --git a/dist/ba_root/mods/playersData/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/ba_root/mods/playersData/__pycache__/__init__.cpython-38.opt-1.pyc
index 4beb611..99ccc12 100644
Binary files a/dist/ba_root/mods/playersData/__pycache__/__init__.cpython-38.opt-1.pyc and b/dist/ba_root/mods/playersData/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/playersData/__pycache__/pdata.cpython-38.opt-1.pyc b/dist/ba_root/mods/playersData/__pycache__/pdata.cpython-38.opt-1.pyc
index 7c62e31..77cea0b 100644
Binary files a/dist/ba_root/mods/playersData/__pycache__/pdata.cpython-38.opt-1.pyc and b/dist/ba_root/mods/playersData/__pycache__/pdata.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/playersData/custom.json b/dist/ba_root/mods/playersData/custom.json
index 46ebb63..2453437 100644
--- a/dist/ba_root/mods/playersData/custom.json
+++ b/dist/ba_root/mods/playersData/custom.json
@@ -1,10 +1,10 @@
{
- "customtag":{
- "pb-id":"smoothy",
- "pb-45":"something"
- },
- "customeffects":{
- "pb-id":"ice",
- "pb-id":"scorch"
- }
+ "customtag": {
+ "pb-id": "smoothy",
+ "pb-45": "something",
+ "pb-IF4TVWwZUQ==": "proowner"
+ },
+ "customeffects": {
+ "pb-IF4TVWwZUQ==": "spark"
+ }
}
\ No newline at end of file
diff --git a/dist/ba_root/mods/playersData/pdata.py b/dist/ba_root/mods/playersData/pdata.py
index 99f7849..d5da8e7 100644
--- a/dist/ba_root/mods/playersData/pdata.py
+++ b/dist/ba_root/mods/playersData/pdata.py
@@ -9,14 +9,60 @@ custom = {}
data_path = os.path.join(_ba.env()['python_directory_user'],"playersData" + os.sep)
+def get_info(id):
+ with open(data_path+'profiles.json', 'r') as f:
+
+ profile = json.load(f)
+ return profile[id]
+
+def add_profile(id,display_string,allprofiles,currentname):
+ f=open(data_path+"profiles.json","r")
+ profiles=json.load(f.read())
+ f.close()
+
+ profiles[id]['display_string']=[display_string]
+ profiles[id]['profiles']=allprofiles
+ profiles[id]['name']=currentname
+
+ f=open(data_path+"profiles.json","w")
+ json.dump(profiles,f,indent=4)
+ f.close()
-def commit(data):
+
+def update_profile(id,display_string=None,allprofiles=[],name=None):
+ f=open(data_path+"profiles.json","r")
+ profiles=json.load(f.read())
+ f.close()
+ if id in profiles:
+ if display_string != None and display_string not in profiles[id]['display_string']:
+ profiles[id]['display_string'].append(display_string)
+ if profiles!=[]:
+ for profile in allprofiles:
+ if profile not in profiles[id]['profiles']:
+ profiles[id]['profiles'].append(profile)
+ if name != None:
+ profiles[id]['name']=name
+
+
+ f=open(data_path+"profiles.json","w")
+ json.dump(profiles,f,indent=4)
+ f.close()
+
+
+
+
+def commit_roles(data):
global roles
if data == {}:
return
+ output=json.dumps(data,indent=4)
+ import re
+ output2 = re.sub(r'": \[\s+', '": [', output)
+ output3 = re.sub(r'",\s+', '", ', output2)
+ output4 = re.sub(r'"\s+\]', '"]', output3)
with open(data_path+'roles.json','w') as f:
- json.dump(data, f, indent=4)
+ f.write(output4)
def get_roles():
@@ -38,7 +84,7 @@ def create_role(role):
"ids":[]
}
roles = _roles
- commit(_roles)
+ commit_roles(_roles)
return
return
@@ -47,12 +93,15 @@ def add_player_role(role, id):
global roles
_roles = get_roles()
if role in _roles:
+
if id not in _roles[role]["ids"]:
_roles[role]["ids"].append(id)
roles =_roles
- commit(_roles)
- return "added to "+role
- return "role not exists"
+ commit_roles(_roles)
+
+ else:
+ print("no role such")
+
def remove_player_role(role, id):
@@ -61,7 +110,7 @@ def remove_player_role(role, id):
if role in _roles:
_roles[role]["ids"].remove(id)
roles =_roles
- commit(_roles)
+ commit_roles(_roles)
return "removed from "+role
return "role not exists"
@@ -75,7 +124,7 @@ def add_command_role(role, command):
if command not in _roles[role]["commands"]:
_roles[role]["commands"].append(command)
roles =_roles
- commit(_roles)
+ commit_roles(_roles)
return "command added to "+role
return "command not exists"
@@ -87,7 +136,7 @@ def remove_command_role(role, command):
if command in _roles[role]["commands"]:
_roles[role]["commands"].remove(command)
roles =_roles
- commit(_roles)
+ commit_roles(_roles)
return "command added to "+role
return "command not exists"
@@ -98,18 +147,19 @@ def change_role_tag(role, tag):
if role in _roles:
_roles[role]['tag'] = tag
roles = _roles
- commit(_roles)
+ commit_roles(_roles)
return "tag changed"
return "role not exists"
-def get_role(acc_id):
- global roles
- _roles = get_roles()
+def get_player_roles(acc_id):
+ _roles = get_roles()
+ roles=[]
for role in _roles:
if acc_id in _roles[role]["ids"]:
- return role
+ roles.append(role)
+ return roles
##### those ups done will clean it in future
@@ -137,6 +187,7 @@ def set_effect(effect, id):
def set_tag(tag, id):
+
global custom
_custom = get_custom()
_custom['customtag'][id] = tag
@@ -165,5 +216,12 @@ def commit_c():
with open(data_path+"custom.json",'w') as f:
json.dump(custom,f,indent=4)
-def update_toppers(toperlist):
- pass
+def update_toppers(topperlist):
+ global roles
+ _roles = get_roles()
+ if "top5" not in _roles:
+ create_role("top5")
+ roles["top5"]["ids"]=topperlist
+ commit_roles(roles)
+
+
diff --git a/dist/ba_root/mods/playersData/profiles.json b/dist/ba_root/mods/playersData/profiles.json
new file mode 100644
index 0000000..da3987d
--- /dev/null
+++ b/dist/ba_root/mods/playersData/profiles.json
@@ -0,0 +1,7 @@
+{
+ "pb-difsdf":{
+ "display_string":[],
+ "profiles":[],
+ "name":"something"
+ }
+}
\ No newline at end of file
diff --git a/dist/ba_root/mods/playersData/roles.json b/dist/ba_root/mods/playersData/roles.json
index 2c56c73..d2dcaf2 100644
--- a/dist/ba_root/mods/playersData/roles.json
+++ b/dist/ba_root/mods/playersData/roles.json
@@ -1,43 +1,46 @@
{
"owner": {
- "tag": "owner",
- "tagcolor": [1,0.6,0.4],
+ "tag": "\\cowner\\c", "tagcolor": [1,
+ 0.6,
+ 0.4
+ ],
"commands": ["ALL"],
- "ids": [
- "pb-IF48VWkBFQ",
- "pb-JiNJARBaXEFBVF9HFkNXXF1EF0ZaRlZE"
- ]
+ "ids": ["pb-IF48VWkBFQ", "pb-JiNJARBaXEFBVF9HFkNXXF1EF0ZaRlZE", "pb-IF4TVWwZUQ=="]
},
- "test69": {
- "tag": "test69",
- "tagcolor": [
- 1,
+ "admin": {
+ "tag": "\ue043admin\ue043", "tagcolor": [1,
1,
1
],
- "commands": [
- "mute"
- ],
- "ids": [
- "pb-IF4VAk4a"
- ]
+ "commands": ["createrole"],
+ "ids": ["pb-IF4TVWwZUQ=="]
},
- "test70": {
- "tag": "test70",
- "tagcolor": [
- 1,
+ "vip": {
+ "tag": "vip", "tagcolor": [1,
1,
1
],
"commands": [],
- "ids": [
- "pb-IF4VAk4a"
- ]
+ "ids": []
},
- "test71": {
- "tag": "test71",
- "tagcolor": [
+ "top5": {
+ "tag": "top5", "tagcolor": [1,
1,
+ 1
+ ],
+ "commands": [],
+ "ids": ["pb-IF4iVUc5Cg==", "pb-IF4TVWwZUQ==", "pb-IF42VUpaDg==", "pb-IF4LVU0KKQ==", "pb-IF4uVW8bMw=="]
+ },
+ "smoothy": {
+ "tag": "smoothy", "tagcolor": [1,
+ 1,
+ 1
+ ],
+ "commands": [],
+ "ids": []
+ },
+ "pros": {
+ "tag": "pros", "tagcolor": [1,
1,
1
],
diff --git a/dist/ba_root/mods/setting.json b/dist/ba_root/mods/setting.json
index 82ddd4a..662073b 100644
--- a/dist/ba_root/mods/setting.json
+++ b/dist/ba_root/mods/setting.json
@@ -1,6 +1,6 @@
{
"white_list": {
- "whitelist_on": true,
+ "whitelist_on": false,
"spectators": false,
"lobbychecktime": 1
},
diff --git a/dist/ba_root/mods/spazmod/__pycache__/effects.cpython-38.opt-1.pyc b/dist/ba_root/mods/spazmod/__pycache__/effects.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..0c7901e
Binary files /dev/null and b/dist/ba_root/mods/spazmod/__pycache__/effects.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/spazmod/__pycache__/modifyspaz.cpython-38.opt-1.pyc b/dist/ba_root/mods/spazmod/__pycache__/modifyspaz.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..b696a5d
Binary files /dev/null and b/dist/ba_root/mods/spazmod/__pycache__/modifyspaz.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/spazmod/__pycache__/tag.cpython-38.opt-1.pyc b/dist/ba_root/mods/spazmod/__pycache__/tag.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..61c4d6a
Binary files /dev/null and b/dist/ba_root/mods/spazmod/__pycache__/tag.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/spazmod/effects.py b/dist/ba_root/mods/spazmod/effects.py
index b668d42..10f3d36 100644
--- a/dist/ba_root/mods/spazmod/effects.py
+++ b/dist/ba_root/mods/spazmod/effects.py
@@ -21,6 +21,7 @@ from stats import mystats
from tools import globalvars as gvar
PlayerType = TypeVar('PlayerType', bound=ba.Player)
TeamType = TypeVar('TeamType', bound=ba.Team)
+from ba._enums import TimeType
tt = ba.TimeType.SIM
tf = ba.TimeFormat.MILLISECONDS
@@ -82,7 +83,7 @@ class SurroundBall(ba.Actor):
def initTimer(self, p):
self.node.position = self.getTargetPosition(p)
- self.surroundTimer = ba.Timer(30, ba.WeakCall(self.circleMove), repeat=True, timetype=tt, timeformat=tf)
+ self.surroundTimer = ba.Timer(30, self.circleMove, repeat=True, timetype=tt, timeformat=tf)
def circleMove(self):
spaz = self.spazRef()
@@ -140,7 +141,7 @@ class Effect(ba.Actor):
self.checkDeadTimer = None
self._hasDead = False
self.light = None
- flag = 0
+
node_id = self.source_player.node.playerID
cl_str = None
clID = None
@@ -149,60 +150,64 @@ class Effect(ba.Actor):
profiles = c.inputdevice.get_player_profiles()
clID = c.inputdevice.client_id
cl_str = c.get_account_id()
+
try:
if cl_str in custom_effects:
effect = custom_effects[cl_str]
+
if effect == 'ice':
- self.snowTimer = ba.Timer(500, ba.WeakCall(self.emitIce), repeat=True, timetype=tt, timeformat=tf)
- flag = 1
+
+ self.emitIce()
+ self.snowTimer = ba.Timer(0.5, self.emitIce, repeat=True, timetype=TimeType.SIM)
+ return
elif effect == 'sweat':
- self.smokeTimer = ba.Timer(40, ba.WeakCall(self.emitSmoke), repeat=True, timetype=tt, timeformat=tf)
- flag = 1
+ self.smokeTimer = ba.Timer(0.6, self.emitSmoke, repeat=True, timetype=TimeType.SIM)
+ return
elif effect == 'scorch':
- self.scorchTimer = ba.Timer(500, ba.WeakCall(self.update_Scorch), repeat=True, timetype=tt, timeformat=tf)
- flag = 1
+ self.scorchTimer = ba.Timer(500, self.update_Scorch, repeat=True, timetype=tt, timeformat=tf)
+ return
elif effect == 'glow':
self.addLightColor((1, 0.6, 0.4))
- self.checkDeadTimer = ba.Timer(150, ba.WeakCall(self.checkPlayerifDead), repeat=True, timetype=tt, timeformat=tf)
- flag = 1
+ self.checkDeadTimer = ba.Timer(150, self.checkPlayerifDead, repeat=True, timetype=tt, timeformat=tf)
+ return
elif effect == 'distortion':
- self.DistortionTimer = ba.Timer(1000, ba.WeakCall(self.emitDistortion), repeat=True, timetype=tt, timeformat=tf)
- flag = 1
+ self.DistortionTimer = ba.Timer(1000, self.emitDistortion, repeat=True, timetype=tt, timeformat=tf)
+ return
elif effect == 'slime':
- self.slimeTimer = ba.Timer(250, ba.WeakCall(self.emitSlime), repeat=True, timetype=tt, timeformat=tf)
- flag = 1
+ self.slimeTimer = ba.Timer(250, self.emitSlime, repeat=True, timetype=tt, timeformat=tf)
+ return
elif effect == 'metal':
- self.metalTimer = ba.Timer(500, ba.WeakCall(self.emitMetal), repeat=True, timetype=tt, timeformat=tf)
- flag = 1
+ self.metalTimer = ba.Timer(500, self.emitMetal, repeat=True, timetype=tt, timeformat=tf)
+ return
elif effect == 'surrounder':
self.surround = SurroundBall(spaz, shape="bones")
- flag = 1
+ return
elif effect == 'spark':
- self.sparkTimer = ba.Timer(100, ba.WeakCall(self.emitSpark), repeat=True, timetype=tt, timeformat=tf)
- flag = 1
+ self.sparkTimer = ba.Timer(100, self.emitSpark, repeat=True, timetype=tt, timeformat=tf)
+ return
except:
pass
if _settings['enablestats']:
pats = mystats.get_all_stats()
- if cl_str in pats:
+ if cl_str in pats and _settings['enableTop5effects']:
rank = pats[cl_str]["rank"]
if rank < 6:
if rank == 1:
- if flag == 0 and _settings['enableTop5effects']:
- self.surround = SurroundBall(spaz, shape="bones") #self.neroLightTimer = ba.Timer(500, ba.WeakCall(self.neonLightSwitch,("shine" in self.Decorations),("extra_Highlight" in self.Decorations),("extra_NameColor" in self.Decorations)),repeat = True, timetype=tt, timeformat=tf)
+
+ self.surround = SurroundBall(spaz, shape="bones") #self.neroLightTimer = ba.Timer(500, ba.WeakCall(self.neonLightSwitch,("shine" in self.Decorations),("extra_Highlight" in self.Decorations),("extra_NameColor" in self.Decorations)),repeat = True, timetype=tt, timeformat=tf)
elif rank == 2:
- if flag == 0 and _settings['enableTop5effects']:
- self.smokeTimer = ba.Timer(40, ba.WeakCall(self.emitSmoke), repeat=True, timetype=tt, timeformat=tf)
+
+ self.smokeTimer = ba.Timer(40, self.emitSmoke, repeat=True, timetype=tt, timeformat=tf)
elif rank == 3:
- if flag == 0 and _settings['enableTop5effects']:
- self.addLightColor((1, 0.6, 0.4));self.scorchTimer = ba.Timer(500, ba.WeakCall(self.update_Scorch), repeat=True, timetype=tt, timeformat=tf)
+
+ self.addLightColor((1, 0.6, 0.4));self.scorchTimer = ba.Timer(500, self.update_Scorch, repeat=True, timetype=tt, timeformat=tf)
elif rank == 4:
- if flag == 0 and _settings['enableTop5effects']:
- self.metalTimer = ba.Timer(500, ba.WeakCall(self.emitMetal), repeat=True, timetype=tt, timeformat=tf)
+
+ self.metalTimer = ba.Timer(500, self.emitMetal, repeat=True, timetype=tt, timeformat=tf)
else:
- if flag == 0 and _settings['enableTop5effects']:
- self.addLightColor((1, 0.6, 0.4));self.checkDeadTimer = ba.Timer(150, ba.WeakCall(self.checkPlayerifDead), repeat=True, timetype=tt, timeformat=tf)
+
+ self.addLightColor((1, 0.6, 0.4));self.checkDeadTimer = ba.Timer(150, self.checkPlayerifDead, repeat=True, timetype=tt, timeformat=tf)
if "smoke" and "spark" and "snowDrops" and "slimeDrops" and "metalDrops" and "Distortion" and "neroLight" and "scorch" and "HealTimer" and "KamikazeCheck" not in self.Decorations:
#self.checkDeadTimer = ba.Timer(150, ba.WeakCall(self.checkPlayerifDead), repeat=True, timetype=tt, timeformat=tf)
@@ -272,6 +277,7 @@ class Effect(ba.Actor):
def emitIce(self):
spaz = self.spazRef()
+
if spaz is None or not spaz.is_alive() or not spaz.node.exists():
self.handlemessage(ba.DieMessage())
return
diff --git a/dist/ba_root/mods/spazmod/tag.py b/dist/ba_root/mods/spazmod/tag.py
index d5c50a8..35bd3e3 100644
--- a/dist/ba_root/mods/spazmod/tag.py
+++ b/dist/ba_root/mods/spazmod/tag.py
@@ -2,20 +2,24 @@
from playersData import pdata
import ba, setting
def addtag(node,player):
- session_player=player.sessionplayer
- account_id=session_player.get_account_id()
- customtag_=pdata.get_custom()
- customtag=customtag_['customtag']
- roles=pdata.get_roles()
- role=pdata.get_role(account_id)
- tag=None
- if account_id in customtag:
- tag=customtag[account_id]
- elif role:
- tag=roles[role]['tag']
- if tag:
- Tag(node,tag)
-
+ session_player=player.sessionplayer
+ account_id=session_player.get_account_id()
+ customtag_=pdata.get_custom()
+ customtag=customtag_['customtag']
+ roles=pdata.get_roles()
+ p_roles=pdata.get_player_roles(account_id)
+ tag=None
+ if account_id in customtag:
+ tag=customtag[account_id]
+ elif p_roles !=[]:
+ for role in roles:
+
+ if role in p_roles:
+ tag=roles[role]['tag']
+ break;
+ if tag:
+ Tag(node,tag)
+
from stats import mystats
def addrank(node,player):
session_player=player.sessionplayer
@@ -42,6 +46,25 @@ class Tag(object):
'operation': 'add'
})
self.node.connectattr('torso_position', mnode, 'input2')
+ if '\\' in tag:
+
+ tag = tag.replace('\\d', ('\ue048'))
+ tag = tag.replace('\\c', ('\ue043'))
+ tag = tag.replace('\\h', ('\ue049'))
+ tag = tag.replace('\\s', ('\ue046'))
+ tag = tag.replace('\\n', ('\ue04b'))
+ tag = tag.replace('\\f', ('\ue04f'))
+ tag = tag.replace('\\g', ('\ue027'))
+ tag = tag.replace('\\i', ('\ue03a'))
+ tag = tag.replace('\\m', ('\ue04d'))
+ tag = tag.replace('\\t', ('\ue01f'))
+ tag = tag.replace('\\bs', ('\ue01e'))
+ tag = tag.replace('\\j', ('\ue010'))
+ tag = tag.replace('\\e', ('\ue045'))
+ tag = tag.replace('\\l', ('\ue047'))
+ tag = tag.replace('\\a', ('\ue020'))
+ tag = tag.replace('\\b', ('\ue00c'))
+
self.tag_text = ba.newnode('text',
owner=self.node,
attrs={
diff --git a/dist/ba_root/mods/stats/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/ba_root/mods/stats/__pycache__/__init__.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..5985f55
Binary files /dev/null and b/dist/ba_root/mods/stats/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/stats/__pycache__/mystats.cpython-38.opt-1.pyc b/dist/ba_root/mods/stats/__pycache__/mystats.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..80d594c
Binary files /dev/null and b/dist/ba_root/mods/stats/__pycache__/mystats.cpython-38.opt-1.pyc differ
diff --git a/dist/ba_root/mods/stats/mystats.py b/dist/ba_root/mods/stats/mystats.py
index 8f03024..c9aaa39 100644
--- a/dist/ba_root/mods/stats/mystats.py
+++ b/dist/ba_root/mods/stats/mystats.py
@@ -65,7 +65,7 @@ def get_stats_by_id(ID: str):
if ID in a:
return a[ID]
else:
- print(f"'{ID}' not found in stats, returning None")
+
return None
def refreshStats():
@@ -237,7 +237,7 @@ class UpdateThread(threading.Thread):
# aaand that's it! There IS no step 27!
now = datetime.now()
update_time = now.strftime("%S:%M:%H - %d %b %y")
- print(f"Added {str(len(self._account_kills))} account's stats entries. || {str(update_time)}")
+ #print(f"Added {str(len(self._account_kills))} account's stats entries. || {str(update_time)}")
refreshStats()
def getRank(acc_id):
diff --git a/dist/ba_root/mods/stats/stats.json b/dist/ba_root/mods/stats/stats.json
index e0b50f0..24e7bd4 100644
--- a/dist/ba_root/mods/stats/stats.json
+++ b/dist/ba_root/mods/stats/stats.json
@@ -1,7 +1,7 @@
{
"pb-IF4VAk4a": {
- "rank": 1,
- "name_html": "pb-IF4VAk4a",
+ "rank": 15,
+ "name": "pb-IF4VAk4a",
"scores": 0,
"total_damage": 0.0,
"kills": 0,
@@ -10,5 +10,173 @@
"kd": 0.0,
"avg_score": 0.0,
"aid": "pb-IF4VAk4a"
+ },
+ "pb-IF4TVWwZUQ==": {
+ "rank": 2,
+ "name": "\ue030PC295588",
+ "scores": 470,
+ "total_damage": 0.0,
+ "kills": 1,
+ "deaths": 70,
+ "games": 42,
+ "kd": 0.014,
+ "avg_score": 11.19,
+ "aid": "pb-IF4TVWwZUQ=="
+ },
+ "pb-JiNJARBaXEFBVF9HFkNXXF1EF0ZaRlZE": {
+ "rank": 7,
+ "name": "\ue020HeySmoothy",
+ "scores": 208,
+ "total_damage": 0.0,
+ "kills": 8,
+ "deaths": 33,
+ "games": 9,
+ "kd": 0.242,
+ "avg_score": 23.111,
+ "aid": "pb-JiNJARBaXEFBVF9HFkNXXF1EF0ZaRlZE"
+ },
+ "pb-IF4uVW8bMw==": {
+ "rank": 5,
+ "name": "\ue020BurnedMedusa35646",
+ "scores": 396,
+ "total_damage": 0.0,
+ "kills": 31,
+ "deaths": 22,
+ "games": 6,
+ "kd": 1.409,
+ "avg_score": 66.0,
+ "aid": "pb-IF4uVW8bMw=="
+ },
+ "pb-IF4eVUooAw==": {
+ "rank": 10,
+ "name": "\ue030Android51971883",
+ "scores": 159,
+ "total_damage": 0.0,
+ "kills": 15,
+ "deaths": 21,
+ "games": 6,
+ "kd": 0.714,
+ "avg_score": 26.5,
+ "aid": "pb-IF4eVUooAw=="
+ },
+ "pb-IF4iVUc5Cg==": {
+ "rank": 1,
+ "name": "\ue020PicturesquePond10",
+ "scores": 851,
+ "total_damage": 0.0,
+ "kills": 51,
+ "deaths": 56,
+ "games": 10,
+ "kd": 0.91,
+ "avg_score": 85.1,
+ "aid": "pb-IF4iVUc5Cg=="
+ },
+ "pb-IF4rV0MoFA==": {
+ "rank": 8,
+ "name": "\ue030Android31875021",
+ "scores": 188,
+ "total_damage": 0.0,
+ "kills": 12,
+ "deaths": 14,
+ "games": 3,
+ "kd": 0.857,
+ "avg_score": 62.666,
+ "aid": "pb-IF4rV0MoFA=="
+ },
+ "pb-IF43VU0aVQ==": {
+ "rank": 6,
+ "name": "\ue020HARSHALgaming8992",
+ "scores": 253,
+ "total_damage": 0.0,
+ "kills": 12,
+ "deaths": 50,
+ "games": 6,
+ "kd": 0.24,
+ "avg_score": 42.166,
+ "aid": "pb-IF43VU0aVQ=="
+ },
+ "pb-IF4LVU0KKQ==": {
+ "rank": 4,
+ "name": "\ue020SHAMANT1111",
+ "scores": 434,
+ "total_damage": 0.0,
+ "kills": 28,
+ "deaths": 54,
+ "games": 6,
+ "kd": 0.518,
+ "avg_score": 72.333,
+ "aid": "pb-IF4LVU0KKQ=="
+ },
+ "pb-IF4VVUgJVw==": {
+ "rank": 9,
+ "name": "\ue020WobblyLining33",
+ "scores": 182,
+ "total_damage": 0.0,
+ "kills": 10,
+ "deaths": 27,
+ "games": 4,
+ "kd": 0.37,
+ "avg_score": 45.5,
+ "aid": "pb-IF4VVUgJVw=="
+ },
+ "pb-IF4gVU0BCg==": {
+ "rank": 14,
+ "name": "\ue020Dewanggaming",
+ "scores": 43,
+ "total_damage": 0.0,
+ "kills": 3,
+ "deaths": 9,
+ "games": 3,
+ "kd": 0.333,
+ "avg_score": 14.333,
+ "aid": "pb-IF4gVU0BCg=="
+ },
+ "pb-IF4FVXkZDQ==": {
+ "rank": 11,
+ "name": "\ue020lllBOLTlll",
+ "scores": 151,
+ "total_damage": 0.0,
+ "kills": 9,
+ "deaths": 15,
+ "games": 3,
+ "kd": 0.6,
+ "avg_score": 50.333,
+ "aid": "pb-IF4FVXkZDQ=="
+ },
+ "pb-IF42VUpaDg==": {
+ "rank": 3,
+ "name": "\ue020EasterlyArtillery29",
+ "scores": 468,
+ "total_damage": 0.0,
+ "kills": 25,
+ "deaths": 37,
+ "games": 4,
+ "kd": 0.675,
+ "avg_score": 117.0,
+ "aid": "pb-IF42VUpaDg=="
+ },
+ "pb-IF4iVUNSIw==": {
+ "rank": 13,
+ "name": "default",
+ "scores": 56,
+ "total_damage": 0.0,
+ "kills": 2,
+ "deaths": 5,
+ "games": 1,
+ "kd": 0.4,
+ "avg_score": 56.0,
+ "aid": "pb-IF4iVUNSIw=="
+ },
+ "pb-IF4PVGcSJw==": {
+ "rank": 12,
+ "name": "\ue030Android29104233",
+ "scores": 84,
+ "total_damage": 0.0,
+ "kills": 7,
+ "deaths": 9,
+ "games": 2,
+ "kd": 0.777,
+ "avg_score": 42.0,
+ "aid": "pb-IF4PVGcSJw=="
}
}
\ No newline at end of file
diff --git a/dist/ba_root/mods/stats/stats_page.html b/dist/ba_root/mods/stats/stats_page.html
new file mode 100644
index 0000000..cddc603
--- /dev/null
+++ b/dist/ba_root/mods/stats/stats_page.html
@@ -0,0 +1,142 @@
+
+
+
+ A problem occurred in a Python script. Here is the sequence of
+function calls leading up to the error, in the order they occurred.
'''
+
+ indent = '":
+ call += inspect.formatargvalues(args, varargs, varkw, locals,
+ formatvalue=lambda value: '=' + pydoc.html.repr(value))
+
+ highlight = {}
+ def reader(lnum=[lnum]):
+ highlight[lnum[0]] = 1
+ try: return linecache.getline(file, lnum[0])
+ finally: lnum[0] += 1
+ vars = scanvars(reader, frame, locals)
+
+ rows = ['%s%s %s ' %
+ (' ', link, call)]
+ if index is not None:
+ i = lnum - index
+ for line in lines:
+ num = small(' ' * (5-len(str(i))) + str(i)) + ' '
+ if i in highlight:
+ line = '=>%s%s ' % (num, pydoc.html.preformat(line))
+ rows.append('%s ' % line)
+ else:
+ line = ' %s%s ' % (num, pydoc.html.preformat(line))
+ rows.append('%s ' % grey(line))
+ i += 1
+
+ done, dump = {}, []
+ for name, where, value in vars:
+ if name in done: continue
+ done[name] = 1
+ if value is not __UNDEF__:
+ if where in ('global', 'builtin'):
+ name = ('%s ' % where) + strong(name)
+ elif where == 'local':
+ name = strong(name)
+ else:
+ name = where + strong(name.split('.')[-1])
+ dump.append('%s = %s' % (name, pydoc.html.repr(value)))
+ else:
+ dump.append(name + ' undefined ')
+
+ rows.append('%s ' % small(grey(', '.join(dump))))
+ frames.append('''
+''' % '\n'.join(rows))
+
+ exception = ['%s: %s' % (strong(pydoc.html.escape(str(etype))),
+ pydoc.html.escape(str(evalue)))]
+ for name in dir(evalue):
+ if name[:1] == '_': continue
+ value = pydoc.html.repr(getattr(evalue, name))
+ exception.append('\n %s%s =\n%s' % (indent, name, value))
+
+ return head + ''.join(frames) + ''.join(exception) + '''
+
+
+
+''' % pydoc.html.escape(
+ ''.join(traceback.format_exception(etype, evalue, etb)))
+
+def text(einfo, context=5):
+ """Return a plain text document describing a given traceback."""
+ etype, evalue, etb = einfo
+ if isinstance(etype, type):
+ etype = etype.__name__
+ pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable
+ date = time.ctime(time.time())
+ head = "%s\n%s\n%s\n" % (str(etype), pyver, date) + '''
+A problem occurred in a Python script. Here is the sequence of
+function calls leading up to the error, in the order they occurred.
+'''
+
+ frames = []
+ records = inspect.getinnerframes(etb, context)
+ for frame, file, lnum, func, lines, index in records:
+ file = file and os.path.abspath(file) or '?'
+ args, varargs, varkw, locals = inspect.getargvalues(frame)
+ call = ''
+ if func != '?':
+ call = 'in ' + func
+ if func != "":
+ call += inspect.formatargvalues(args, varargs, varkw, locals,
+ formatvalue=lambda value: '=' + pydoc.text.repr(value))
+
+ highlight = {}
+ def reader(lnum=[lnum]):
+ highlight[lnum[0]] = 1
+ try: return linecache.getline(file, lnum[0])
+ finally: lnum[0] += 1
+ vars = scanvars(reader, frame, locals)
+
+ rows = [' %s %s' % (file, call)]
+ if index is not None:
+ i = lnum - index
+ for line in lines:
+ num = '%5d ' % i
+ rows.append(num+line.rstrip())
+ i += 1
+
+ done, dump = {}, []
+ for name, where, value in vars:
+ if name in done: continue
+ done[name] = 1
+ if value is not __UNDEF__:
+ if where == 'global': name = 'global ' + name
+ elif where != 'local': name = where + name.split('.')[-1]
+ dump.append('%s = %s' % (name, pydoc.text.repr(value)))
+ else:
+ dump.append(name + ' undefined')
+
+ rows.append('\n'.join(dump))
+ frames.append('\n%s\n' % '\n'.join(rows))
+
+ exception = ['%s: %s' % (str(etype), str(evalue))]
+ for name in dir(evalue):
+ value = pydoc.text.repr(getattr(evalue, name))
+ exception.append('\n%s%s = %s' % (" "*4, name, value))
+
+ return head + ''.join(frames) + ''.join(exception) + '''
+
+The above is a description of an error in a Python program. Here is
+the original traceback:
+
+%s
+''' % ''.join(traceback.format_exception(etype, evalue, etb))
+
+class Hook:
+ """A hook to replace sys.excepthook that shows tracebacks in HTML."""
+
+ def __init__(self, display=1, logdir=None, context=5, file=None,
+ format="html"):
+ self.display = display # send tracebacks to browser if true
+ self.logdir = logdir # log tracebacks to files if not None
+ self.context = context # number of source code lines per frame
+ self.file = file or sys.stdout # place to send the output
+ self.format = format
+
+ def __call__(self, etype, evalue, etb):
+ self.handle((etype, evalue, etb))
+
+ def handle(self, info=None):
+ info = info or sys.exc_info()
+ if self.format == "html":
+ self.file.write(reset())
+
+ formatter = (self.format=="html") and html or text
+ plain = False
+ try:
+ doc = formatter(info, self.context)
+ except: # just in case something goes wrong
+ doc = ''.join(traceback.format_exception(*info))
+ plain = True
+
+ if self.display:
+ if plain:
+ doc = pydoc.html.escape(doc)
+ self.file.write('' + doc + ' \n')
+ else:
+ self.file.write(doc + '\n')
+ else:
+ self.file.write('A problem occurred in a Python script.\n')
+
+ if self.logdir is not None:
+ suffix = ['.txt', '.html'][self.format=="html"]
+ (fd, path) = tempfile.mkstemp(suffix=suffix, dir=self.logdir)
+
+ try:
+ with os.fdopen(fd, 'w') as file:
+ file.write(doc)
+ msg = '%s contains the description of this error.' % path
+ except:
+ msg = 'Tried to save traceback to %s, but failed.' % path
+
+ if self.format == 'html':
+ self.file.write('
%s
\n' % msg)
+ else:
+ self.file.write(msg + '\n')
+ try:
+ self.file.flush()
+ except: pass
+
+handler = Hook().handle
+def enable(display=1, logdir=None, context=5, format="html"):
+ """Install an exception handler that formats tracebacks as HTML.
+
+ The optional argument 'display' can be set to 0 to suppress sending the
+ traceback to the browser, and 'logdir' can be set to a directory to cause
+ tracebacks to be written to files there."""
+ sys.excepthook = Hook(display=display, logdir=logdir,
+ context=context, format=format)
diff --git a/dist/lib/chunk.py b/dist/lib/chunk.py
new file mode 100644
index 0000000..870c39f
--- /dev/null
+++ b/dist/lib/chunk.py
@@ -0,0 +1,169 @@
+"""Simple class to read IFF chunks.
+
+An IFF chunk (used in formats such as AIFF, TIFF, RMFF (RealMedia File
+Format)) has the following structure:
+
++----------------+
+| ID (4 bytes) |
++----------------+
+| size (4 bytes) |
++----------------+
+| data |
+| ... |
++----------------+
+
+The ID is a 4-byte string which identifies the type of chunk.
+
+The size field (a 32-bit value, encoded using big-endian byte order)
+gives the size of the whole chunk, including the 8-byte header.
+
+Usually an IFF-type file consists of one or more chunks. The proposed
+usage of the Chunk class defined here is to instantiate an instance at
+the start of each chunk and read from the instance until it reaches
+the end, after which a new instance can be instantiated. At the end
+of the file, creating a new instance will fail with an EOFError
+exception.
+
+Usage:
+while True:
+ try:
+ chunk = Chunk(file)
+ except EOFError:
+ break
+ chunktype = chunk.getname()
+ while True:
+ data = chunk.read(nbytes)
+ if not data:
+ pass
+ # do something with data
+
+The interface is file-like. The implemented methods are:
+read, close, seek, tell, isatty.
+Extra methods are: skip() (called by close, skips to the end of the chunk),
+getname() (returns the name (ID) of the chunk)
+
+The __init__ method has one required argument, a file-like object
+(including a chunk instance), and one optional argument, a flag which
+specifies whether or not chunks are aligned on 2-byte boundaries. The
+default is 1, i.e. aligned.
+"""
+
+class Chunk:
+ def __init__(self, file, align=True, bigendian=True, inclheader=False):
+ import struct
+ self.closed = False
+ self.align = align # whether to align to word (2-byte) boundaries
+ if bigendian:
+ strflag = '>'
+ else:
+ strflag = '<'
+ self.file = file
+ self.chunkname = file.read(4)
+ if len(self.chunkname) < 4:
+ raise EOFError
+ try:
+ self.chunksize = struct.unpack_from(strflag+'L', file.read(4))[0]
+ except struct.error:
+ raise EOFError from None
+ if inclheader:
+ self.chunksize = self.chunksize - 8 # subtract header
+ self.size_read = 0
+ try:
+ self.offset = self.file.tell()
+ except (AttributeError, OSError):
+ self.seekable = False
+ else:
+ self.seekable = True
+
+ def getname(self):
+ """Return the name (ID) of the current chunk."""
+ return self.chunkname
+
+ def getsize(self):
+ """Return the size of the current chunk."""
+ return self.chunksize
+
+ def close(self):
+ if not self.closed:
+ try:
+ self.skip()
+ finally:
+ self.closed = True
+
+ def isatty(self):
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+ return False
+
+ def seek(self, pos, whence=0):
+ """Seek to specified position into the chunk.
+ Default position is 0 (start of chunk).
+ If the file is not seekable, this will result in an error.
+ """
+
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+ if not self.seekable:
+ raise OSError("cannot seek")
+ if whence == 1:
+ pos = pos + self.size_read
+ elif whence == 2:
+ pos = pos + self.chunksize
+ if pos < 0 or pos > self.chunksize:
+ raise RuntimeError
+ self.file.seek(self.offset + pos, 0)
+ self.size_read = pos
+
+ def tell(self):
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+ return self.size_read
+
+ def read(self, size=-1):
+ """Read at most size bytes from the chunk.
+ If size is omitted or negative, read until the end
+ of the chunk.
+ """
+
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+ if self.size_read >= self.chunksize:
+ return b''
+ if size < 0:
+ size = self.chunksize - self.size_read
+ if size > self.chunksize - self.size_read:
+ size = self.chunksize - self.size_read
+ data = self.file.read(size)
+ self.size_read = self.size_read + len(data)
+ if self.size_read == self.chunksize and \
+ self.align and \
+ (self.chunksize & 1):
+ dummy = self.file.read(1)
+ self.size_read = self.size_read + len(dummy)
+ return data
+
+ def skip(self):
+ """Skip the rest of the chunk.
+ If you are not interested in the contents of the chunk,
+ this method should be called so that the file points to
+ the start of the next chunk.
+ """
+
+ if self.closed:
+ raise ValueError("I/O operation on closed file")
+ if self.seekable:
+ try:
+ n = self.chunksize - self.size_read
+ # maybe fix alignment
+ if self.align and (self.chunksize & 1):
+ n = n + 1
+ self.file.seek(n, 1)
+ self.size_read = self.size_read + n
+ return
+ except OSError:
+ pass
+ while self.size_read < self.chunksize:
+ n = min(8192, self.chunksize - self.size_read)
+ dummy = self.read(n)
+ if not dummy:
+ raise EOFError
diff --git a/dist/lib/cmd.py b/dist/lib/cmd.py
new file mode 100644
index 0000000..859e910
--- /dev/null
+++ b/dist/lib/cmd.py
@@ -0,0 +1,401 @@
+"""A generic class to build line-oriented command interpreters.
+
+Interpreters constructed with this class obey the following conventions:
+
+1. End of file on input is processed as the command 'EOF'.
+2. A command is parsed out of each line by collecting the prefix composed
+ of characters in the identchars member.
+3. A command `foo' is dispatched to a method 'do_foo()'; the do_ method
+ is passed a single argument consisting of the remainder of the line.
+4. Typing an empty line repeats the last command. (Actually, it calls the
+ method `emptyline', which may be overridden in a subclass.)
+5. There is a predefined `help' method. Given an argument `topic', it
+ calls the command `help_topic'. With no arguments, it lists all topics
+ with defined help_ functions, broken into up to three topics; documented
+ commands, miscellaneous help topics, and undocumented commands.
+6. The command '?' is a synonym for `help'. The command '!' is a synonym
+ for `shell', if a do_shell method exists.
+7. If completion is enabled, completing commands will be done automatically,
+ and completing of commands args is done by calling complete_foo() with
+ arguments text, line, begidx, endidx. text is string we are matching
+ against, all returned matches must begin with it. line is the current
+ input line (lstripped), begidx and endidx are the beginning and end
+ indexes of the text being matched, which could be used to provide
+ different completion depending upon which position the argument is in.
+
+The `default' method may be overridden to intercept commands for which there
+is no do_ method.
+
+The `completedefault' method may be overridden to intercept completions for
+commands that have no complete_ method.
+
+The data member `self.ruler' sets the character used to draw separator lines
+in the help messages. If empty, no ruler line is drawn. It defaults to "=".
+
+If the value of `self.intro' is nonempty when the cmdloop method is called,
+it is printed out on interpreter startup. This value may be overridden
+via an optional argument to the cmdloop() method.
+
+The data members `self.doc_header', `self.misc_header', and
+`self.undoc_header' set the headers used for the help function's
+listings of documented functions, miscellaneous topics, and undocumented
+functions respectively.
+"""
+
+import string, sys
+
+__all__ = ["Cmd"]
+
+PROMPT = '(Cmd) '
+IDENTCHARS = string.ascii_letters + string.digits + '_'
+
+class Cmd:
+ """A simple framework for writing line-oriented command interpreters.
+
+ These are often useful for test harnesses, administrative tools, and
+ prototypes that will later be wrapped in a more sophisticated interface.
+
+ A Cmd instance or subclass instance is a line-oriented interpreter
+ framework. There is no good reason to instantiate Cmd itself; rather,
+ it's useful as a superclass of an interpreter class you define yourself
+ in order to inherit Cmd's methods and encapsulate action methods.
+
+ """
+ prompt = PROMPT
+ identchars = IDENTCHARS
+ ruler = '='
+ lastcmd = ''
+ intro = None
+ doc_leader = ""
+ doc_header = "Documented commands (type help ):"
+ misc_header = "Miscellaneous help topics:"
+ undoc_header = "Undocumented commands:"
+ nohelp = "*** No help on %s"
+ use_rawinput = 1
+
+ def __init__(self, completekey='tab', stdin=None, stdout=None):
+ """Instantiate a line-oriented interpreter framework.
+
+ The optional argument 'completekey' is the readline name of a
+ completion key; it defaults to the Tab key. If completekey is
+ not None and the readline module is available, command completion
+ is done automatically. The optional arguments stdin and stdout
+ specify alternate input and output file objects; if not specified,
+ sys.stdin and sys.stdout are used.
+
+ """
+ if stdin is not None:
+ self.stdin = stdin
+ else:
+ self.stdin = sys.stdin
+ if stdout is not None:
+ self.stdout = stdout
+ else:
+ self.stdout = sys.stdout
+ self.cmdqueue = []
+ self.completekey = completekey
+
+ def cmdloop(self, intro=None):
+ """Repeatedly issue a prompt, accept input, parse an initial prefix
+ off the received input, and dispatch to action methods, passing them
+ the remainder of the line as argument.
+
+ """
+
+ self.preloop()
+ if self.use_rawinput and self.completekey:
+ try:
+ import readline
+ self.old_completer = readline.get_completer()
+ readline.set_completer(self.complete)
+ readline.parse_and_bind(self.completekey+": complete")
+ except ImportError:
+ pass
+ try:
+ if intro is not None:
+ self.intro = intro
+ if self.intro:
+ self.stdout.write(str(self.intro)+"\n")
+ stop = None
+ while not stop:
+ if self.cmdqueue:
+ line = self.cmdqueue.pop(0)
+ else:
+ if self.use_rawinput:
+ try:
+ line = input(self.prompt)
+ except EOFError:
+ line = 'EOF'
+ else:
+ self.stdout.write(self.prompt)
+ self.stdout.flush()
+ line = self.stdin.readline()
+ if not len(line):
+ line = 'EOF'
+ else:
+ line = line.rstrip('\r\n')
+ line = self.precmd(line)
+ stop = self.onecmd(line)
+ stop = self.postcmd(stop, line)
+ self.postloop()
+ finally:
+ if self.use_rawinput and self.completekey:
+ try:
+ import readline
+ readline.set_completer(self.old_completer)
+ except ImportError:
+ pass
+
+
+ def precmd(self, line):
+ """Hook method executed just before the command line is
+ interpreted, but after the input prompt is generated and issued.
+
+ """
+ return line
+
+ def postcmd(self, stop, line):
+ """Hook method executed just after a command dispatch is finished."""
+ return stop
+
+ def preloop(self):
+ """Hook method executed once when the cmdloop() method is called."""
+ pass
+
+ def postloop(self):
+ """Hook method executed once when the cmdloop() method is about to
+ return.
+
+ """
+ pass
+
+ def parseline(self, line):
+ """Parse the line into a command name and a string containing
+ the arguments. Returns a tuple containing (command, args, line).
+ 'command' and 'args' may be None if the line couldn't be parsed.
+ """
+ line = line.strip()
+ if not line:
+ return None, None, line
+ elif line[0] == '?':
+ line = 'help ' + line[1:]
+ elif line[0] == '!':
+ if hasattr(self, 'do_shell'):
+ line = 'shell ' + line[1:]
+ else:
+ return None, None, line
+ i, n = 0, len(line)
+ while i < n and line[i] in self.identchars: i = i+1
+ cmd, arg = line[:i], line[i:].strip()
+ return cmd, arg, line
+
+ def onecmd(self, line):
+ """Interpret the argument as though it had been typed in response
+ to the prompt.
+
+ This may be overridden, but should not normally need to be;
+ see the precmd() and postcmd() methods for useful execution hooks.
+ The return value is a flag indicating whether interpretation of
+ commands by the interpreter should stop.
+
+ """
+ cmd, arg, line = self.parseline(line)
+ if not line:
+ return self.emptyline()
+ if cmd is None:
+ return self.default(line)
+ self.lastcmd = line
+ if line == 'EOF' :
+ self.lastcmd = ''
+ if cmd == '':
+ return self.default(line)
+ else:
+ try:
+ func = getattr(self, 'do_' + cmd)
+ except AttributeError:
+ return self.default(line)
+ return func(arg)
+
+ def emptyline(self):
+ """Called when an empty line is entered in response to the prompt.
+
+ If this method is not overridden, it repeats the last nonempty
+ command entered.
+
+ """
+ if self.lastcmd:
+ return self.onecmd(self.lastcmd)
+
+ def default(self, line):
+ """Called on an input line when the command prefix is not recognized.
+
+ If this method is not overridden, it prints an error message and
+ returns.
+
+ """
+ self.stdout.write('*** Unknown syntax: %s\n'%line)
+
+ def completedefault(self, *ignored):
+ """Method called to complete an input line when no command-specific
+ complete_*() method is available.
+
+ By default, it returns an empty list.
+
+ """
+ return []
+
+ def completenames(self, text, *ignored):
+ dotext = 'do_'+text
+ return [a[3:] for a in self.get_names() if a.startswith(dotext)]
+
+ def complete(self, text, state):
+ """Return the next possible completion for 'text'.
+
+ If a command has not been entered, then complete against command list.
+ Otherwise try to call complete_ to get list of completions.
+ """
+ if state == 0:
+ import readline
+ origline = readline.get_line_buffer()
+ line = origline.lstrip()
+ stripped = len(origline) - len(line)
+ begidx = readline.get_begidx() - stripped
+ endidx = readline.get_endidx() - stripped
+ if begidx>0:
+ cmd, args, foo = self.parseline(line)
+ if cmd == '':
+ compfunc = self.completedefault
+ else:
+ try:
+ compfunc = getattr(self, 'complete_' + cmd)
+ except AttributeError:
+ compfunc = self.completedefault
+ else:
+ compfunc = self.completenames
+ self.completion_matches = compfunc(text, line, begidx, endidx)
+ try:
+ return self.completion_matches[state]
+ except IndexError:
+ return None
+
+ def get_names(self):
+ # This method used to pull in base class attributes
+ # at a time dir() didn't do it yet.
+ return dir(self.__class__)
+
+ def complete_help(self, *args):
+ commands = set(self.completenames(*args))
+ topics = set(a[5:] for a in self.get_names()
+ if a.startswith('help_' + args[0]))
+ return list(commands | topics)
+
+ def do_help(self, arg):
+ 'List available commands with "help" or detailed help with "help cmd".'
+ if arg:
+ # XXX check arg syntax
+ try:
+ func = getattr(self, 'help_' + arg)
+ except AttributeError:
+ try:
+ doc=getattr(self, 'do_' + arg).__doc__
+ if doc:
+ self.stdout.write("%s\n"%str(doc))
+ return
+ except AttributeError:
+ pass
+ self.stdout.write("%s\n"%str(self.nohelp % (arg,)))
+ return
+ func()
+ else:
+ names = self.get_names()
+ cmds_doc = []
+ cmds_undoc = []
+ help = {}
+ for name in names:
+ if name[:5] == 'help_':
+ help[name[5:]]=1
+ names.sort()
+ # There can be duplicates if routines overridden
+ prevname = ''
+ for name in names:
+ if name[:3] == 'do_':
+ if name == prevname:
+ continue
+ prevname = name
+ cmd=name[3:]
+ if cmd in help:
+ cmds_doc.append(cmd)
+ del help[cmd]
+ elif getattr(self, name).__doc__:
+ cmds_doc.append(cmd)
+ else:
+ cmds_undoc.append(cmd)
+ self.stdout.write("%s\n"%str(self.doc_leader))
+ self.print_topics(self.doc_header, cmds_doc, 15,80)
+ self.print_topics(self.misc_header, list(help.keys()),15,80)
+ self.print_topics(self.undoc_header, cmds_undoc, 15,80)
+
+ def print_topics(self, header, cmds, cmdlen, maxcol):
+ if cmds:
+ self.stdout.write("%s\n"%str(header))
+ if self.ruler:
+ self.stdout.write("%s\n"%str(self.ruler * len(header)))
+ self.columnize(cmds, maxcol-1)
+ self.stdout.write("\n")
+
+ def columnize(self, list, displaywidth=80):
+ """Display a list of strings as a compact set of columns.
+
+ Each column is only as wide as necessary.
+ Columns are separated by two spaces (one was not legible enough).
+ """
+ if not list:
+ self.stdout.write("\n")
+ return
+
+ nonstrings = [i for i in range(len(list))
+ if not isinstance(list[i], str)]
+ if nonstrings:
+ raise TypeError("list[i] not a string for i in %s"
+ % ", ".join(map(str, nonstrings)))
+ size = len(list)
+ if size == 1:
+ self.stdout.write('%s\n'%str(list[0]))
+ return
+ # Try every row count from 1 upwards
+ for nrows in range(1, len(list)):
+ ncols = (size+nrows-1) // nrows
+ colwidths = []
+ totwidth = -2
+ for col in range(ncols):
+ colwidth = 0
+ for row in range(nrows):
+ i = row + nrows*col
+ if i >= size:
+ break
+ x = list[i]
+ colwidth = max(colwidth, len(x))
+ colwidths.append(colwidth)
+ totwidth += colwidth + 2
+ if totwidth > displaywidth:
+ break
+ if totwidth <= displaywidth:
+ break
+ else:
+ nrows = len(list)
+ ncols = 1
+ colwidths = [0]
+ for row in range(nrows):
+ texts = []
+ for col in range(ncols):
+ i = row + nrows*col
+ if i >= size:
+ x = ""
+ else:
+ x = list[i]
+ texts.append(x)
+ while texts and not texts[-1]:
+ del texts[-1]
+ for col in range(len(texts)):
+ texts[col] = texts[col].ljust(colwidths[col])
+ self.stdout.write("%s\n"%str(" ".join(texts)))
diff --git a/dist/lib/code.py b/dist/lib/code.py
new file mode 100644
index 0000000..76000f8
--- /dev/null
+++ b/dist/lib/code.py
@@ -0,0 +1,315 @@
+"""Utilities needed to emulate Python's interactive interpreter.
+
+"""
+
+# Inspired by similar code by Jeff Epler and Fredrik Lundh.
+
+
+import sys
+import traceback
+from codeop import CommandCompiler, compile_command
+
+__all__ = ["InteractiveInterpreter", "InteractiveConsole", "interact",
+ "compile_command"]
+
+class InteractiveInterpreter:
+ """Base class for InteractiveConsole.
+
+ This class deals with parsing and interpreter state (the user's
+ namespace); it doesn't deal with input buffering or prompting or
+ input file naming (the filename is always passed in explicitly).
+
+ """
+
+ def __init__(self, locals=None):
+ """Constructor.
+
+ The optional 'locals' argument specifies the dictionary in
+ which code will be executed; it defaults to a newly created
+ dictionary with key "__name__" set to "__console__" and key
+ "__doc__" set to None.
+
+ """
+ if locals is None:
+ locals = {"__name__": "__console__", "__doc__": None}
+ self.locals = locals
+ self.compile = CommandCompiler()
+
+ def runsource(self, source, filename=" ", symbol="single"):
+ """Compile and run some source in the interpreter.
+
+ Arguments are as for compile_command().
+
+ One of several things can happen:
+
+ 1) The input is incorrect; compile_command() raised an
+ exception (SyntaxError or OverflowError). A syntax traceback
+ will be printed by calling the showsyntaxerror() method.
+
+ 2) The input is incomplete, and more input is required;
+ compile_command() returned None. Nothing happens.
+
+ 3) The input is complete; compile_command() returned a code
+ object. The code is executed by calling self.runcode() (which
+ also handles run-time exceptions, except for SystemExit).
+
+ The return value is True in case 2, False in the other cases (unless
+ an exception is raised). The return value can be used to
+ decide whether to use sys.ps1 or sys.ps2 to prompt the next
+ line.
+
+ """
+ try:
+ code = self.compile(source, filename, symbol)
+ except (OverflowError, SyntaxError, ValueError):
+ # Case 1
+ self.showsyntaxerror(filename)
+ return False
+
+ if code is None:
+ # Case 2
+ return True
+
+ # Case 3
+ self.runcode(code)
+ return False
+
+ def runcode(self, code):
+ """Execute a code object.
+
+ When an exception occurs, self.showtraceback() is called to
+ display a traceback. All exceptions are caught except
+ SystemExit, which is reraised.
+
+ A note about KeyboardInterrupt: this exception may occur
+ elsewhere in this code, and may not always be caught. The
+ caller should be prepared to deal with it.
+
+ """
+ try:
+ exec(code, self.locals)
+ except SystemExit:
+ raise
+ except:
+ self.showtraceback()
+
+ def showsyntaxerror(self, filename=None):
+ """Display the syntax error that just occurred.
+
+ This doesn't display a stack trace because there isn't one.
+
+ If a filename is given, it is stuffed in the exception instead
+ of what was there before (because Python's parser always uses
+ "" when reading from a string).
+
+ The output is written by self.write(), below.
+
+ """
+ type, value, tb = sys.exc_info()
+ sys.last_type = type
+ sys.last_value = value
+ sys.last_traceback = tb
+ if filename and type is SyntaxError:
+ # Work hard to stuff the correct filename in the exception
+ try:
+ msg, (dummy_filename, lineno, offset, line) = value.args
+ except ValueError:
+ # Not the format we expect; leave it alone
+ pass
+ else:
+ # Stuff in the right filename
+ value = SyntaxError(msg, (filename, lineno, offset, line))
+ sys.last_value = value
+ if sys.excepthook is sys.__excepthook__:
+ lines = traceback.format_exception_only(type, value)
+ self.write(''.join(lines))
+ else:
+ # If someone has set sys.excepthook, we let that take precedence
+ # over self.write
+ sys.excepthook(type, value, tb)
+
+ def showtraceback(self):
+ """Display the exception that just occurred.
+
+ We remove the first stack item because it is our own code.
+
+ The output is written by self.write(), below.
+
+ """
+ sys.last_type, sys.last_value, last_tb = ei = sys.exc_info()
+ sys.last_traceback = last_tb
+ try:
+ lines = traceback.format_exception(ei[0], ei[1], last_tb.tb_next)
+ if sys.excepthook is sys.__excepthook__:
+ self.write(''.join(lines))
+ else:
+ # If someone has set sys.excepthook, we let that take precedence
+ # over self.write
+ sys.excepthook(ei[0], ei[1], last_tb)
+ finally:
+ last_tb = ei = None
+
+ def write(self, data):
+ """Write a string.
+
+ The base implementation writes to sys.stderr; a subclass may
+ replace this with a different implementation.
+
+ """
+ sys.stderr.write(data)
+
+
+class InteractiveConsole(InteractiveInterpreter):
+ """Closely emulate the behavior of the interactive Python interpreter.
+
+ This class builds on InteractiveInterpreter and adds prompting
+ using the familiar sys.ps1 and sys.ps2, and input buffering.
+
+ """
+
+ def __init__(self, locals=None, filename=""):
+ """Constructor.
+
+ The optional locals argument will be passed to the
+ InteractiveInterpreter base class.
+
+ The optional filename argument should specify the (file)name
+ of the input stream; it will show up in tracebacks.
+
+ """
+ InteractiveInterpreter.__init__(self, locals)
+ self.filename = filename
+ self.resetbuffer()
+
+ def resetbuffer(self):
+ """Reset the input buffer."""
+ self.buffer = []
+
+ def interact(self, banner=None, exitmsg=None):
+ """Closely emulate the interactive Python console.
+
+ The optional banner argument specifies the banner to print
+ before the first interaction; by default it prints a banner
+ similar to the one printed by the real Python interpreter,
+ followed by the current class name in parentheses (so as not
+ to confuse this with the real interpreter -- since it's so
+ close!).
+
+ The optional exitmsg argument specifies the exit message
+ printed when exiting. Pass the empty string to suppress
+ printing an exit message. If exitmsg is not given or None,
+ a default message is printed.
+
+ """
+ try:
+ sys.ps1
+ except AttributeError:
+ sys.ps1 = ">>> "
+ try:
+ sys.ps2
+ except AttributeError:
+ sys.ps2 = "... "
+ cprt = 'Type "help", "copyright", "credits" or "license" for more information.'
+ if banner is None:
+ self.write("Python %s on %s\n%s\n(%s)\n" %
+ (sys.version, sys.platform, cprt,
+ self.__class__.__name__))
+ elif banner:
+ self.write("%s\n" % str(banner))
+ more = 0
+ while 1:
+ try:
+ if more:
+ prompt = sys.ps2
+ else:
+ prompt = sys.ps1
+ try:
+ line = self.raw_input(prompt)
+ except EOFError:
+ self.write("\n")
+ break
+ else:
+ more = self.push(line)
+ except KeyboardInterrupt:
+ self.write("\nKeyboardInterrupt\n")
+ self.resetbuffer()
+ more = 0
+ if exitmsg is None:
+ self.write('now exiting %s...\n' % self.__class__.__name__)
+ elif exitmsg != '':
+ self.write('%s\n' % exitmsg)
+
+ def push(self, line):
+ """Push a line to the interpreter.
+
+ The line should not have a trailing newline; it may have
+ internal newlines. The line is appended to a buffer and the
+ interpreter's runsource() method is called with the
+ concatenated contents of the buffer as source. If this
+ indicates that the command was executed or invalid, the buffer
+ is reset; otherwise, the command is incomplete, and the buffer
+ is left as it was after the line was appended. The return
+ value is 1 if more input is required, 0 if the line was dealt
+ with in some way (this is the same as runsource()).
+
+ """
+ self.buffer.append(line)
+ source = "\n".join(self.buffer)
+ more = self.runsource(source, self.filename)
+ if not more:
+ self.resetbuffer()
+ return more
+
+ def raw_input(self, prompt=""):
+ """Write a prompt and read a line.
+
+ The returned line does not include the trailing newline.
+ When the user enters the EOF key sequence, EOFError is raised.
+
+ The base implementation uses the built-in function
+ input(); a subclass may replace this with a different
+ implementation.
+
+ """
+ return input(prompt)
+
+
+
+def interact(banner=None, readfunc=None, local=None, exitmsg=None):
+ """Closely emulate the interactive Python interpreter.
+
+ This is a backwards compatible interface to the InteractiveConsole
+ class. When readfunc is not specified, it attempts to import the
+ readline module to enable GNU readline if it is available.
+
+ Arguments (all optional, all default to None):
+
+ banner -- passed to InteractiveConsole.interact()
+ readfunc -- if not None, replaces InteractiveConsole.raw_input()
+ local -- passed to InteractiveInterpreter.__init__()
+ exitmsg -- passed to InteractiveConsole.interact()
+
+ """
+ console = InteractiveConsole(local)
+ if readfunc is not None:
+ console.raw_input = readfunc
+ else:
+ try:
+ import readline
+ except ImportError:
+ pass
+ console.interact(banner, exitmsg)
+
+
+if __name__ == "__main__":
+ import argparse
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-q', action='store_true',
+ help="don't print version and copyright messages")
+ args = parser.parse_args()
+ if args.q or sys.flags.quiet:
+ banner = ''
+ else:
+ banner = None
+ interact(banner)
diff --git a/dist/lib/codecs.py b/dist/lib/codecs.py
new file mode 100644
index 0000000..7f23e97
--- /dev/null
+++ b/dist/lib/codecs.py
@@ -0,0 +1,1126 @@
+""" codecs -- Python Codec Registry, API and helpers.
+
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+
+"""
+
+import builtins
+import sys
+
+### Registry and builtin stateless codec functions
+
+try:
+ from _codecs import *
+except ImportError as why:
+ raise SystemError('Failed to load the builtin codecs: %s' % why)
+
+__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
+ "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
+ "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
+ "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
+ "CodecInfo", "Codec", "IncrementalEncoder", "IncrementalDecoder",
+ "StreamReader", "StreamWriter",
+ "StreamReaderWriter", "StreamRecoder",
+ "getencoder", "getdecoder", "getincrementalencoder",
+ "getincrementaldecoder", "getreader", "getwriter",
+ "encode", "decode", "iterencode", "iterdecode",
+ "strict_errors", "ignore_errors", "replace_errors",
+ "xmlcharrefreplace_errors",
+ "backslashreplace_errors", "namereplace_errors",
+ "register_error", "lookup_error"]
+
+### Constants
+
+#
+# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF)
+# and its possible byte string values
+# for UTF8/UTF16/UTF32 output and little/big endian machines
+#
+
+# UTF-8
+BOM_UTF8 = b'\xef\xbb\xbf'
+
+# UTF-16, little endian
+BOM_LE = BOM_UTF16_LE = b'\xff\xfe'
+
+# UTF-16, big endian
+BOM_BE = BOM_UTF16_BE = b'\xfe\xff'
+
+# UTF-32, little endian
+BOM_UTF32_LE = b'\xff\xfe\x00\x00'
+
+# UTF-32, big endian
+BOM_UTF32_BE = b'\x00\x00\xfe\xff'
+
+if sys.byteorder == 'little':
+
+ # UTF-16, native endianness
+ BOM = BOM_UTF16 = BOM_UTF16_LE
+
+ # UTF-32, native endianness
+ BOM_UTF32 = BOM_UTF32_LE
+
+else:
+
+ # UTF-16, native endianness
+ BOM = BOM_UTF16 = BOM_UTF16_BE
+
+ # UTF-32, native endianness
+ BOM_UTF32 = BOM_UTF32_BE
+
+# Old broken names (don't use in new code)
+BOM32_LE = BOM_UTF16_LE
+BOM32_BE = BOM_UTF16_BE
+BOM64_LE = BOM_UTF32_LE
+BOM64_BE = BOM_UTF32_BE
+
+
+### Codec base classes (defining the API)
+
+class CodecInfo(tuple):
+ """Codec details when looking up the codec registry"""
+
+ # Private API to allow Python 3.4 to blacklist the known non-Unicode
+ # codecs in the standard library. A more general mechanism to
+ # reliably distinguish test encodings from other codecs will hopefully
+ # be defined for Python 3.5
+ #
+ # See http://bugs.python.org/issue19619
+ _is_text_encoding = True # Assume codecs are text encodings by default
+
+ def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
+ incrementalencoder=None, incrementaldecoder=None, name=None,
+ *, _is_text_encoding=None):
+ self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
+ self.name = name
+ self.encode = encode
+ self.decode = decode
+ self.incrementalencoder = incrementalencoder
+ self.incrementaldecoder = incrementaldecoder
+ self.streamwriter = streamwriter
+ self.streamreader = streamreader
+ if _is_text_encoding is not None:
+ self._is_text_encoding = _is_text_encoding
+ return self
+
+ def __repr__(self):
+ return "<%s.%s object for encoding %s at %#x>" % \
+ (self.__class__.__module__, self.__class__.__qualname__,
+ self.name, id(self))
+
+class Codec:
+
+ """ Defines the interface for stateless encoders/decoders.
+
+ The .encode()/.decode() methods may use different error
+ handling schemes by providing the errors argument. These
+ string values are predefined:
+
+ 'strict' - raise a ValueError error (or a subclass)
+ 'ignore' - ignore the character and continue with the next
+ 'replace' - replace with a suitable replacement character;
+ Python will use the official U+FFFD REPLACEMENT
+ CHARACTER for the builtin Unicode codecs on
+ decoding and '?' on encoding.
+ 'surrogateescape' - replace with private code points U+DCnn.
+ 'xmlcharrefreplace' - Replace with the appropriate XML
+ character reference (only for encoding).
+ 'backslashreplace' - Replace with backslashed escape sequences.
+ 'namereplace' - Replace with \\N{...} escape sequences
+ (only for encoding).
+
+ The set of allowed values can be extended via register_error.
+
+ """
+ def encode(self, input, errors='strict'):
+
+ """ Encodes the object input and returns a tuple (output
+ object, length consumed).
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling.
+
+ The method may not store state in the Codec instance. Use
+ StreamWriter for codecs which have to keep state in order to
+ make encoding efficient.
+
+ The encoder must be able to handle zero length input and
+ return an empty object of the output object type in this
+ situation.
+
+ """
+ raise NotImplementedError
+
+ def decode(self, input, errors='strict'):
+
+ """ Decodes the object input and returns a tuple (output
+ object, length consumed).
+
+ input must be an object which provides the bf_getreadbuf
+ buffer slot. Python strings, buffer objects and memory
+ mapped files are examples of objects providing this slot.
+
+ errors defines the error handling to apply. It defaults to
+ 'strict' handling.
+
+ The method may not store state in the Codec instance. Use
+ StreamReader for codecs which have to keep state in order to
+ make decoding efficient.
+
+ The decoder must be able to handle zero length input and
+ return an empty object of the output object type in this
+ situation.
+
+ """
+ raise NotImplementedError
+
+class IncrementalEncoder(object):
+ """
+ An IncrementalEncoder encodes an input in multiple steps. The input can
+ be passed piece by piece to the encode() method. The IncrementalEncoder
+ remembers the state of the encoding process between calls to encode().
+ """
+ def __init__(self, errors='strict'):
+ """
+ Creates an IncrementalEncoder instance.
+
+ The IncrementalEncoder may use different error handling schemes by
+ providing the errors keyword argument. See the module docstring
+ for a list of possible values.
+ """
+ self.errors = errors
+ self.buffer = ""
+
+ def encode(self, input, final=False):
+ """
+ Encodes input and returns the resulting object.
+ """
+ raise NotImplementedError
+
+ def reset(self):
+ """
+ Resets the encoder to the initial state.
+ """
+
+ def getstate(self):
+ """
+ Return the current state of the encoder.
+ """
+ return 0
+
+ def setstate(self, state):
+ """
+ Set the current state of the encoder. state must have been
+ returned by getstate().
+ """
+
+class BufferedIncrementalEncoder(IncrementalEncoder):
+ """
+ This subclass of IncrementalEncoder can be used as the baseclass for an
+ incremental encoder if the encoder must keep some of the output in a
+ buffer between calls to encode().
+ """
+ def __init__(self, errors='strict'):
+ IncrementalEncoder.__init__(self, errors)
+ # unencoded input that is kept between calls to encode()
+ self.buffer = ""
+
+ def _buffer_encode(self, input, errors, final):
+ # Overwrite this method in subclasses: It must encode input
+ # and return an (output, length consumed) tuple
+ raise NotImplementedError
+
+ def encode(self, input, final=False):
+ # encode input (taking the buffer into account)
+ data = self.buffer + input
+ (result, consumed) = self._buffer_encode(data, self.errors, final)
+ # keep unencoded input until the next call
+ self.buffer = data[consumed:]
+ return result
+
+ def reset(self):
+ IncrementalEncoder.reset(self)
+ self.buffer = ""
+
+ def getstate(self):
+ return self.buffer or 0
+
+ def setstate(self, state):
+ self.buffer = state or ""
+
+class IncrementalDecoder(object):
+ """
+ An IncrementalDecoder decodes an input in multiple steps. The input can
+ be passed piece by piece to the decode() method. The IncrementalDecoder
+ remembers the state of the decoding process between calls to decode().
+ """
+ def __init__(self, errors='strict'):
+ """
+ Create an IncrementalDecoder instance.
+
+ The IncrementalDecoder may use different error handling schemes by
+ providing the errors keyword argument. See the module docstring
+ for a list of possible values.
+ """
+ self.errors = errors
+
+ def decode(self, input, final=False):
+ """
+ Decode input and returns the resulting object.
+ """
+ raise NotImplementedError
+
+ def reset(self):
+ """
+ Reset the decoder to the initial state.
+ """
+
+ def getstate(self):
+ """
+ Return the current state of the decoder.
+
+ This must be a (buffered_input, additional_state_info) tuple.
+ buffered_input must be a bytes object containing bytes that
+ were passed to decode() that have not yet been converted.
+ additional_state_info must be a non-negative integer
+ representing the state of the decoder WITHOUT yet having
+ processed the contents of buffered_input. In the initial state
+ and after reset(), getstate() must return (b"", 0).
+ """
+ return (b"", 0)
+
+ def setstate(self, state):
+ """
+ Set the current state of the decoder.
+
+ state must have been returned by getstate(). The effect of
+ setstate((b"", 0)) must be equivalent to reset().
+ """
+
+class BufferedIncrementalDecoder(IncrementalDecoder):
+ """
+ This subclass of IncrementalDecoder can be used as the baseclass for an
+ incremental decoder if the decoder must be able to handle incomplete
+ byte sequences.
+ """
+ def __init__(self, errors='strict'):
+ IncrementalDecoder.__init__(self, errors)
+ # undecoded input that is kept between calls to decode()
+ self.buffer = b""
+
+ def _buffer_decode(self, input, errors, final):
+ # Overwrite this method in subclasses: It must decode input
+ # and return an (output, length consumed) tuple
+ raise NotImplementedError
+
+ def decode(self, input, final=False):
+ # decode input (taking the buffer into account)
+ data = self.buffer + input
+ (result, consumed) = self._buffer_decode(data, self.errors, final)
+ # keep undecoded input until the next call
+ self.buffer = data[consumed:]
+ return result
+
+ def reset(self):
+ IncrementalDecoder.reset(self)
+ self.buffer = b""
+
+ def getstate(self):
+ # additional state info is always 0
+ return (self.buffer, 0)
+
+ def setstate(self, state):
+ # ignore additional state info
+ self.buffer = state[0]
+
+#
+# The StreamWriter and StreamReader class provide generic working
+# interfaces which can be used to implement new encoding submodules
+# very easily. See encodings/utf_8.py for an example on how this is
+# done.
+#
+
+class StreamWriter(Codec):
+
+ def __init__(self, stream, errors='strict'):
+
+ """ Creates a StreamWriter instance.
+
+ stream must be a file-like object open for writing.
+
+ The StreamWriter may use different error handling
+ schemes by providing the errors keyword argument. These
+ parameters are predefined:
+
+ 'strict' - raise a ValueError (or a subclass)
+ 'ignore' - ignore the character and continue with the next
+ 'replace'- replace with a suitable replacement character
+ 'xmlcharrefreplace' - Replace with the appropriate XML
+ character reference.
+ 'backslashreplace' - Replace with backslashed escape
+ sequences.
+ 'namereplace' - Replace with \\N{...} escape sequences.
+
+ The set of allowed parameter values can be extended via
+ register_error.
+ """
+ self.stream = stream
+ self.errors = errors
+
+ def write(self, object):
+
+ """ Writes the object's contents encoded to self.stream.
+ """
+ data, consumed = self.encode(object, self.errors)
+ self.stream.write(data)
+
+ def writelines(self, list):
+
+ """ Writes the concatenated list of strings to the stream
+ using .write().
+ """
+ self.write(''.join(list))
+
+ def reset(self):
+
+ """ Flushes and resets the codec buffers used for keeping state.
+
+ Calling this method should ensure that the data on the
+ output is put into a clean state, that allows appending
+ of new fresh data without having to rescan the whole
+ stream to recover state.
+
+ """
+ pass
+
+ def seek(self, offset, whence=0):
+ self.stream.seek(offset, whence)
+ if whence == 0 and offset == 0:
+ self.reset()
+
+ def __getattr__(self, name,
+ getattr=getattr):
+
+ """ Inherit all other methods from the underlying stream.
+ """
+ return getattr(self.stream, name)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, tb):
+ self.stream.close()
+
+###
+
+class StreamReader(Codec):
+
+ charbuffertype = str
+
+ def __init__(self, stream, errors='strict'):
+
+ """ Creates a StreamReader instance.
+
+ stream must be a file-like object open for reading.
+
+ The StreamReader may use different error handling
+ schemes by providing the errors keyword argument. These
+ parameters are predefined:
+
+ 'strict' - raise a ValueError (or a subclass)
+ 'ignore' - ignore the character and continue with the next
+ 'replace'- replace with a suitable replacement character
+ 'backslashreplace' - Replace with backslashed escape sequences;
+
+ The set of allowed parameter values can be extended via
+ register_error.
+ """
+ self.stream = stream
+ self.errors = errors
+ self.bytebuffer = b""
+ self._empty_charbuffer = self.charbuffertype()
+ self.charbuffer = self._empty_charbuffer
+ self.linebuffer = None
+
+ def decode(self, input, errors='strict'):
+ raise NotImplementedError
+
+ def read(self, size=-1, chars=-1, firstline=False):
+
+ """ Decodes data from the stream self.stream and returns the
+ resulting object.
+
+ chars indicates the number of decoded code points or bytes to
+ return. read() will never return more data than requested,
+ but it might return less, if there is not enough available.
+
+ size indicates the approximate maximum number of decoded
+ bytes or code points to read for decoding. The decoder
+ can modify this setting as appropriate. The default value
+ -1 indicates to read and decode as much as possible. size
+ is intended to prevent having to decode huge files in one
+ step.
+
+ If firstline is true, and a UnicodeDecodeError happens
+ after the first line terminator in the input only the first line
+ will be returned, the rest of the input will be kept until the
+ next call to read().
+
+ The method should use a greedy read strategy, meaning that
+ it should read as much data as is allowed within the
+ definition of the encoding and the given size, e.g. if
+ optional encoding endings or state markers are available
+ on the stream, these should be read too.
+ """
+ # If we have lines cached, first merge them back into characters
+ if self.linebuffer:
+ self.charbuffer = self._empty_charbuffer.join(self.linebuffer)
+ self.linebuffer = None
+
+ if chars < 0:
+ # For compatibility with other read() methods that take a
+ # single argument
+ chars = size
+
+ # read until we get the required number of characters (if available)
+ while True:
+ # can the request be satisfied from the character buffer?
+ if chars >= 0:
+ if len(self.charbuffer) >= chars:
+ break
+ # we need more data
+ if size < 0:
+ newdata = self.stream.read()
+ else:
+ newdata = self.stream.read(size)
+ # decode bytes (those remaining from the last call included)
+ data = self.bytebuffer + newdata
+ if not data:
+ break
+ try:
+ newchars, decodedbytes = self.decode(data, self.errors)
+ except UnicodeDecodeError as exc:
+ if firstline:
+ newchars, decodedbytes = \
+ self.decode(data[:exc.start], self.errors)
+ lines = newchars.splitlines(keepends=True)
+ if len(lines)<=1:
+ raise
+ else:
+ raise
+ # keep undecoded bytes until the next call
+ self.bytebuffer = data[decodedbytes:]
+ # put new characters in the character buffer
+ self.charbuffer += newchars
+ # there was no data available
+ if not newdata:
+ break
+ if chars < 0:
+ # Return everything we've got
+ result = self.charbuffer
+ self.charbuffer = self._empty_charbuffer
+ else:
+ # Return the first chars characters
+ result = self.charbuffer[:chars]
+ self.charbuffer = self.charbuffer[chars:]
+ return result
+
+ def readline(self, size=None, keepends=True):
+
+ """ Read one line from the input stream and return the
+ decoded data.
+
+ size, if given, is passed as size argument to the
+ read() method.
+
+ """
+ # If we have lines cached from an earlier read, return
+ # them unconditionally
+ if self.linebuffer:
+ line = self.linebuffer[0]
+ del self.linebuffer[0]
+ if len(self.linebuffer) == 1:
+ # revert to charbuffer mode; we might need more data
+ # next time
+ self.charbuffer = self.linebuffer[0]
+ self.linebuffer = None
+ if not keepends:
+ line = line.splitlines(keepends=False)[0]
+ return line
+
+ readsize = size or 72
+ line = self._empty_charbuffer
+ # If size is given, we call read() only once
+ while True:
+ data = self.read(readsize, firstline=True)
+ if data:
+ # If we're at a "\r" read one extra character (which might
+ # be a "\n") to get a proper line ending. If the stream is
+ # temporarily exhausted we return the wrong line ending.
+ if (isinstance(data, str) and data.endswith("\r")) or \
+ (isinstance(data, bytes) and data.endswith(b"\r")):
+ data += self.read(size=1, chars=1)
+
+ line += data
+ lines = line.splitlines(keepends=True)
+ if lines:
+ if len(lines) > 1:
+ # More than one line result; the first line is a full line
+ # to return
+ line = lines[0]
+ del lines[0]
+ if len(lines) > 1:
+ # cache the remaining lines
+ lines[-1] += self.charbuffer
+ self.linebuffer = lines
+ self.charbuffer = None
+ else:
+ # only one remaining line, put it back into charbuffer
+ self.charbuffer = lines[0] + self.charbuffer
+ if not keepends:
+ line = line.splitlines(keepends=False)[0]
+ break
+ line0withend = lines[0]
+ line0withoutend = lines[0].splitlines(keepends=False)[0]
+ if line0withend != line0withoutend: # We really have a line end
+ # Put the rest back together and keep it until the next call
+ self.charbuffer = self._empty_charbuffer.join(lines[1:]) + \
+ self.charbuffer
+ if keepends:
+ line = line0withend
+ else:
+ line = line0withoutend
+ break
+ # we didn't get anything or this was our only try
+ if not data or size is not None:
+ if line and not keepends:
+ line = line.splitlines(keepends=False)[0]
+ break
+ if readsize < 8000:
+ readsize *= 2
+ return line
+
+ def readlines(self, sizehint=None, keepends=True):
+
+ """ Read all lines available on the input stream
+ and return them as a list.
+
+ Line breaks are implemented using the codec's decoder
+ method and are included in the list entries.
+
+ sizehint, if given, is ignored since there is no efficient
+ way to finding the true end-of-line.
+
+ """
+ data = self.read()
+ return data.splitlines(keepends)
+
+ def reset(self):
+
+ """ Resets the codec buffers used for keeping state.
+
+ Note that no stream repositioning should take place.
+ This method is primarily intended to be able to recover
+ from decoding errors.
+
+ """
+ self.bytebuffer = b""
+ self.charbuffer = self._empty_charbuffer
+ self.linebuffer = None
+
+ def seek(self, offset, whence=0):
+ """ Set the input stream's current position.
+
+ Resets the codec buffers used for keeping state.
+ """
+ self.stream.seek(offset, whence)
+ self.reset()
+
+ def __next__(self):
+
+ """ Return the next decoded line from the input stream."""
+ line = self.readline()
+ if line:
+ return line
+ raise StopIteration
+
+ def __iter__(self):
+ return self
+
+ def __getattr__(self, name,
+ getattr=getattr):
+
+ """ Inherit all other methods from the underlying stream.
+ """
+ return getattr(self.stream, name)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, tb):
+ self.stream.close()
+
+###
+
+class StreamReaderWriter:
+
+ """ StreamReaderWriter instances allow wrapping streams which
+ work in both read and write modes.
+
+ The design is such that one can use the factory functions
+ returned by the codec.lookup() function to construct the
+ instance.
+
+ """
+ # Optional attributes set by the file wrappers below
+ encoding = 'unknown'
+
+ def __init__(self, stream, Reader, Writer, errors='strict'):
+
+ """ Creates a StreamReaderWriter instance.
+
+ stream must be a Stream-like object.
+
+ Reader, Writer must be factory functions or classes
+ providing the StreamReader, StreamWriter interface resp.
+
+ Error handling is done in the same way as defined for the
+ StreamWriter/Readers.
+
+ """
+ self.stream = stream
+ self.reader = Reader(stream, errors)
+ self.writer = Writer(stream, errors)
+ self.errors = errors
+
+ def read(self, size=-1):
+
+ return self.reader.read(size)
+
+ def readline(self, size=None):
+
+ return self.reader.readline(size)
+
+ def readlines(self, sizehint=None):
+
+ return self.reader.readlines(sizehint)
+
+ def __next__(self):
+
+ """ Return the next decoded line from the input stream."""
+ return next(self.reader)
+
+ def __iter__(self):
+ return self
+
+ def write(self, data):
+
+ return self.writer.write(data)
+
+ def writelines(self, list):
+
+ return self.writer.writelines(list)
+
+ def reset(self):
+
+ self.reader.reset()
+ self.writer.reset()
+
+ def seek(self, offset, whence=0):
+ self.stream.seek(offset, whence)
+ self.reader.reset()
+ if whence == 0 and offset == 0:
+ self.writer.reset()
+
+ def __getattr__(self, name,
+ getattr=getattr):
+
+ """ Inherit all other methods from the underlying stream.
+ """
+ return getattr(self.stream, name)
+
+ # these are needed to make "with StreamReaderWriter(...)" work properly
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, tb):
+ self.stream.close()
+
+###
+
+class StreamRecoder:
+
+ """ StreamRecoder instances translate data from one encoding to another.
+
+ They use the complete set of APIs returned by the
+ codecs.lookup() function to implement their task.
+
+ Data written to the StreamRecoder is first decoded into an
+ intermediate format (depending on the "decode" codec) and then
+ written to the underlying stream using an instance of the provided
+ Writer class.
+
+ In the other direction, data is read from the underlying stream using
+ a Reader instance and then encoded and returned to the caller.
+
+ """
+ # Optional attributes set by the file wrappers below
+ data_encoding = 'unknown'
+ file_encoding = 'unknown'
+
+ def __init__(self, stream, encode, decode, Reader, Writer,
+ errors='strict'):
+
+ """ Creates a StreamRecoder instance which implements a two-way
+ conversion: encode and decode work on the frontend (the
+ data visible to .read() and .write()) while Reader and Writer
+ work on the backend (the data in stream).
+
+ You can use these objects to do transparent
+ transcodings from e.g. latin-1 to utf-8 and back.
+
+ stream must be a file-like object.
+
+ encode and decode must adhere to the Codec interface; Reader and
+ Writer must be factory functions or classes providing the
+ StreamReader and StreamWriter interfaces resp.
+
+ Error handling is done in the same way as defined for the
+ StreamWriter/Readers.
+
+ """
+ self.stream = stream
+ self.encode = encode
+ self.decode = decode
+ self.reader = Reader(stream, errors)
+ self.writer = Writer(stream, errors)
+ self.errors = errors
+
+ def read(self, size=-1):
+
+ data = self.reader.read(size)
+ data, bytesencoded = self.encode(data, self.errors)
+ return data
+
+ def readline(self, size=None):
+
+ if size is None:
+ data = self.reader.readline()
+ else:
+ data = self.reader.readline(size)
+ data, bytesencoded = self.encode(data, self.errors)
+ return data
+
+ def readlines(self, sizehint=None):
+
+ data = self.reader.read()
+ data, bytesencoded = self.encode(data, self.errors)
+ return data.splitlines(keepends=True)
+
+ def __next__(self):
+
+ """ Return the next decoded line from the input stream."""
+ data = next(self.reader)
+ data, bytesencoded = self.encode(data, self.errors)
+ return data
+
+ def __iter__(self):
+ return self
+
+ def write(self, data):
+
+ data, bytesdecoded = self.decode(data, self.errors)
+ return self.writer.write(data)
+
+ def writelines(self, list):
+
+ data = b''.join(list)
+ data, bytesdecoded = self.decode(data, self.errors)
+ return self.writer.write(data)
+
+ def reset(self):
+
+ self.reader.reset()
+ self.writer.reset()
+
+ def seek(self, offset, whence=0):
+ # Seeks must be propagated to both the readers and writers
+ # as they might need to reset their internal buffers.
+ self.reader.seek(offset, whence)
+ self.writer.seek(offset, whence)
+
+ def __getattr__(self, name,
+ getattr=getattr):
+
+ """ Inherit all other methods from the underlying stream.
+ """
+ return getattr(self.stream, name)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, tb):
+ self.stream.close()
+
+### Shortcuts
+
+def open(filename, mode='r', encoding=None, errors='strict', buffering=-1):
+
+ """ Open an encoded file using the given mode and return
+ a wrapped version providing transparent encoding/decoding.
+
+ Note: The wrapped version will only accept the object format
+ defined by the codecs, i.e. Unicode objects for most builtin
+ codecs. Output is also codec dependent and will usually be
+ Unicode as well.
+
+ Underlying encoded files are always opened in binary mode.
+ The default file mode is 'r', meaning to open the file in read mode.
+
+ encoding specifies the encoding which is to be used for the
+ file.
+
+ errors may be given to define the error handling. It defaults
+ to 'strict' which causes ValueErrors to be raised in case an
+ encoding error occurs.
+
+ buffering has the same meaning as for the builtin open() API.
+ It defaults to -1 which means that the default buffer size will
+ be used.
+
+ The returned wrapped file object provides an extra attribute
+ .encoding which allows querying the used encoding. This
+ attribute is only available if an encoding was specified as
+ parameter.
+
+ """
+ if encoding is not None and \
+ 'b' not in mode:
+ # Force opening of the file in binary mode
+ mode = mode + 'b'
+ file = builtins.open(filename, mode, buffering)
+ if encoding is None:
+ return file
+
+ try:
+ info = lookup(encoding)
+ srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors)
+ # Add attributes to simplify introspection
+ srw.encoding = encoding
+ return srw
+ except:
+ file.close()
+ raise
+
+def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
+
+ """ Return a wrapped version of file which provides transparent
+ encoding translation.
+
+ Data written to the wrapped file is decoded according
+ to the given data_encoding and then encoded to the underlying
+ file using file_encoding. The intermediate data type
+ will usually be Unicode but depends on the specified codecs.
+
+ Bytes read from the file are decoded using file_encoding and then
+ passed back to the caller encoded using data_encoding.
+
+ If file_encoding is not given, it defaults to data_encoding.
+
+ errors may be given to define the error handling. It defaults
+ to 'strict' which causes ValueErrors to be raised in case an
+ encoding error occurs.
+
+ The returned wrapped file object provides two extra attributes
+ .data_encoding and .file_encoding which reflect the given
+ parameters of the same name. The attributes can be used for
+ introspection by Python programs.
+
+ """
+ if file_encoding is None:
+ file_encoding = data_encoding
+ data_info = lookup(data_encoding)
+ file_info = lookup(file_encoding)
+ sr = StreamRecoder(file, data_info.encode, data_info.decode,
+ file_info.streamreader, file_info.streamwriter, errors)
+ # Add attributes to simplify introspection
+ sr.data_encoding = data_encoding
+ sr.file_encoding = file_encoding
+ return sr
+
+### Helpers for codec lookup
+
+def getencoder(encoding):
+
+ """ Lookup up the codec for the given encoding and return
+ its encoder function.
+
+ Raises a LookupError in case the encoding cannot be found.
+
+ """
+ return lookup(encoding).encode
+
+def getdecoder(encoding):
+
+ """ Lookup up the codec for the given encoding and return
+ its decoder function.
+
+ Raises a LookupError in case the encoding cannot be found.
+
+ """
+ return lookup(encoding).decode
+
+def getincrementalencoder(encoding):
+
+ """ Lookup up the codec for the given encoding and return
+ its IncrementalEncoder class or factory function.
+
+ Raises a LookupError in case the encoding cannot be found
+ or the codecs doesn't provide an incremental encoder.
+
+ """
+ encoder = lookup(encoding).incrementalencoder
+ if encoder is None:
+ raise LookupError(encoding)
+ return encoder
+
+def getincrementaldecoder(encoding):
+
+ """ Lookup up the codec for the given encoding and return
+ its IncrementalDecoder class or factory function.
+
+ Raises a LookupError in case the encoding cannot be found
+ or the codecs doesn't provide an incremental decoder.
+
+ """
+ decoder = lookup(encoding).incrementaldecoder
+ if decoder is None:
+ raise LookupError(encoding)
+ return decoder
+
+def getreader(encoding):
+
+ """ Lookup up the codec for the given encoding and return
+ its StreamReader class or factory function.
+
+ Raises a LookupError in case the encoding cannot be found.
+
+ """
+ return lookup(encoding).streamreader
+
+def getwriter(encoding):
+
+ """ Lookup up the codec for the given encoding and return
+ its StreamWriter class or factory function.
+
+ Raises a LookupError in case the encoding cannot be found.
+
+ """
+ return lookup(encoding).streamwriter
+
+def iterencode(iterator, encoding, errors='strict', **kwargs):
+ """
+ Encoding iterator.
+
+ Encodes the input strings from the iterator using an IncrementalEncoder.
+
+ errors and kwargs are passed through to the IncrementalEncoder
+ constructor.
+ """
+ encoder = getincrementalencoder(encoding)(errors, **kwargs)
+ for input in iterator:
+ output = encoder.encode(input)
+ if output:
+ yield output
+ output = encoder.encode("", True)
+ if output:
+ yield output
+
+def iterdecode(iterator, encoding, errors='strict', **kwargs):
+ """
+ Decoding iterator.
+
+ Decodes the input strings from the iterator using an IncrementalDecoder.
+
+ errors and kwargs are passed through to the IncrementalDecoder
+ constructor.
+ """
+ decoder = getincrementaldecoder(encoding)(errors, **kwargs)
+ for input in iterator:
+ output = decoder.decode(input)
+ if output:
+ yield output
+ output = decoder.decode(b"", True)
+ if output:
+ yield output
+
+### Helpers for charmap-based codecs
+
+def make_identity_dict(rng):
+
+ """ make_identity_dict(rng) -> dict
+
+ Return a dictionary where elements of the rng sequence are
+ mapped to themselves.
+
+ """
+ return {i:i for i in rng}
+
+def make_encoding_map(decoding_map):
+
+ """ Creates an encoding map from a decoding map.
+
+ If a target mapping in the decoding map occurs multiple
+ times, then that target is mapped to None (undefined mapping),
+ causing an exception when encountered by the charmap codec
+ during translation.
+
+ One example where this happens is cp875.py which decodes
+ multiple character to \\u001a.
+
+ """
+ m = {}
+ for k,v in decoding_map.items():
+ if not v in m:
+ m[v] = k
+ else:
+ m[v] = None
+ return m
+
+### error handlers
+
+try:
+ strict_errors = lookup_error("strict")
+ ignore_errors = lookup_error("ignore")
+ replace_errors = lookup_error("replace")
+ xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace")
+ backslashreplace_errors = lookup_error("backslashreplace")
+ namereplace_errors = lookup_error("namereplace")
+except LookupError:
+ # In --disable-unicode builds, these error handler are missing
+ strict_errors = None
+ ignore_errors = None
+ replace_errors = None
+ xmlcharrefreplace_errors = None
+ backslashreplace_errors = None
+ namereplace_errors = None
+
+# Tell modulefinder that using codecs probably needs the encodings
+# package
+_false = 0
+if _false:
+ import encodings
+
+### Tests
+
+if __name__ == '__main__':
+
+ # Make stdout translate Latin-1 output into UTF-8 output
+ sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8')
+
+ # Have stdin translate Latin-1 input into UTF-8 input
+ sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1')
diff --git a/dist/lib/codeop.py b/dist/lib/codeop.py
new file mode 100644
index 0000000..3c2bb60
--- /dev/null
+++ b/dist/lib/codeop.py
@@ -0,0 +1,176 @@
+r"""Utilities to compile possibly incomplete Python source code.
+
+This module provides two interfaces, broadly similar to the builtin
+function compile(), which take program text, a filename and a 'mode'
+and:
+
+- Return code object if the command is complete and valid
+- Return None if the command is incomplete
+- Raise SyntaxError, ValueError or OverflowError if the command is a
+ syntax error (OverflowError and ValueError can be produced by
+ malformed literals).
+
+Approach:
+
+First, check if the source consists entirely of blank lines and
+comments; if so, replace it with 'pass', because the built-in
+parser doesn't always do the right thing for these.
+
+Compile three times: as is, with \n, and with \n\n appended. If it
+compiles as is, it's complete. If it compiles with one \n appended,
+we expect more. If it doesn't compile either way, we compare the
+error we get when compiling with \n or \n\n appended. If the errors
+are the same, the code is broken. But if the errors are different, we
+expect more. Not intuitive; not even guaranteed to hold in future
+releases; but this matches the compiler's behavior from Python 1.4
+through 2.2, at least.
+
+Caveat:
+
+It is possible (but not likely) that the parser stops parsing with a
+successful outcome before reaching the end of the source; in this
+case, trailing symbols may be ignored instead of causing an error.
+For example, a backslash followed by two newlines may be followed by
+arbitrary garbage. This will be fixed once the API for the parser is
+better.
+
+The two interfaces are:
+
+compile_command(source, filename, symbol):
+
+ Compiles a single command in the manner described above.
+
+CommandCompiler():
+
+ Instances of this class have __call__ methods identical in
+ signature to compile_command; the difference is that if the
+ instance compiles program text containing a __future__ statement,
+ the instance 'remembers' and compiles all subsequent program texts
+ with the statement in force.
+
+The module also provides another class:
+
+Compile():
+
+ Instances of this class act like the built-in function compile,
+ but with 'memory' in the sense described above.
+"""
+
+import __future__
+import warnings
+
+_features = [getattr(__future__, fname)
+ for fname in __future__.all_feature_names]
+
+__all__ = ["compile_command", "Compile", "CommandCompiler"]
+
+PyCF_DONT_IMPLY_DEDENT = 0x200 # Matches pythonrun.h
+
+def _maybe_compile(compiler, source, filename, symbol):
+ # Check for source consisting of only blank lines and comments
+ for line in source.split("\n"):
+ line = line.strip()
+ if line and line[0] != '#':
+ break # Leave it alone
+ else:
+ if symbol != "eval":
+ source = "pass" # Replace it with a 'pass' statement
+
+ err = err1 = err2 = None
+ code = code1 = code2 = None
+
+ try:
+ code = compiler(source, filename, symbol)
+ except SyntaxError as err:
+ pass
+
+ # Suppress warnings after the first compile to avoid duplication.
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore")
+ try:
+ code1 = compiler(source + "\n", filename, symbol)
+ except SyntaxError as e:
+ err1 = e
+
+ try:
+ code2 = compiler(source + "\n\n", filename, symbol)
+ except SyntaxError as e:
+ err2 = e
+
+ try:
+ if code:
+ return code
+ if not code1 and repr(err1) == repr(err2):
+ raise err1
+ finally:
+ err1 = err2 = None
+
+def _compile(source, filename, symbol):
+ return compile(source, filename, symbol, PyCF_DONT_IMPLY_DEDENT)
+
+def compile_command(source, filename=" ", symbol="single"):
+ r"""Compile a command and determine whether it is incomplete.
+
+ Arguments:
+
+ source -- the source string; may contain \n characters
+ filename -- optional filename from which source was read; default
+ " "
+ symbol -- optional grammar start symbol; "single" (default), "exec"
+ or "eval"
+
+ Return value / exceptions raised:
+
+ - Return a code object if the command is complete and valid
+ - Return None if the command is incomplete
+ - Raise SyntaxError, ValueError or OverflowError if the command is a
+ syntax error (OverflowError and ValueError can be produced by
+ malformed literals).
+ """
+ return _maybe_compile(_compile, source, filename, symbol)
+
+class Compile:
+ """Instances of this class behave much like the built-in compile
+ function, but if one is used to compile text containing a future
+ statement, it "remembers" and compiles all subsequent program texts
+ with the statement in force."""
+ def __init__(self):
+ self.flags = PyCF_DONT_IMPLY_DEDENT
+
+ def __call__(self, source, filename, symbol):
+ codeob = compile(source, filename, symbol, self.flags, 1)
+ for feature in _features:
+ if codeob.co_flags & feature.compiler_flag:
+ self.flags |= feature.compiler_flag
+ return codeob
+
+class CommandCompiler:
+ """Instances of this class have __call__ methods identical in
+ signature to compile_command; the difference is that if the
+ instance compiles program text containing a __future__ statement,
+ the instance 'remembers' and compiles all subsequent program texts
+ with the statement in force."""
+
+ def __init__(self,):
+ self.compiler = Compile()
+
+ def __call__(self, source, filename=" ", symbol="single"):
+ r"""Compile a command and determine whether it is incomplete.
+
+ Arguments:
+
+ source -- the source string; may contain \n characters
+ filename -- optional filename from which source was read;
+ default " "
+ symbol -- optional grammar start symbol; "single" (default) or
+ "eval"
+
+ Return value / exceptions raised:
+
+ - Return a code object if the command is complete and valid
+ - Return None if the command is incomplete
+ - Raise SyntaxError, ValueError or OverflowError if the command is a
+ syntax error (OverflowError and ValueError can be produced by
+ malformed literals).
+ """
+ return _maybe_compile(self.compiler, source, filename, symbol)
diff --git a/dist/lib/collections/__init__.py b/dist/lib/collections/__init__.py
new file mode 100644
index 0000000..a78a47c
--- /dev/null
+++ b/dist/lib/collections/__init__.py
@@ -0,0 +1,1279 @@
+'''This module implements specialized container datatypes providing
+alternatives to Python's general purpose built-in containers, dict,
+list, set, and tuple.
+
+* namedtuple factory function for creating tuple subclasses with named fields
+* deque list-like container with fast appends and pops on either end
+* ChainMap dict-like class for creating a single view of multiple mappings
+* Counter dict subclass for counting hashable objects
+* OrderedDict dict subclass that remembers the order entries were added
+* defaultdict dict subclass that calls a factory function to supply missing values
+* UserDict wrapper around dictionary objects for easier dict subclassing
+* UserList wrapper around list objects for easier list subclassing
+* UserString wrapper around string objects for easier string subclassing
+
+'''
+
+__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
+ 'UserString', 'Counter', 'OrderedDict', 'ChainMap']
+
+import _collections_abc
+from operator import itemgetter as _itemgetter, eq as _eq
+from keyword import iskeyword as _iskeyword
+import sys as _sys
+import heapq as _heapq
+from _weakref import proxy as _proxy
+from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
+from reprlib import recursive_repr as _recursive_repr
+
+try:
+ from _collections import deque
+except ImportError:
+ pass
+else:
+ _collections_abc.MutableSequence.register(deque)
+
+try:
+ from _collections import defaultdict
+except ImportError:
+ pass
+
+
+def __getattr__(name):
+ # For backwards compatibility, continue to make the collections ABCs
+ # through Python 3.6 available through the collections module.
+ # Note, no new collections ABCs were added in Python 3.7
+ if name in _collections_abc.__all__:
+ obj = getattr(_collections_abc, name)
+ import warnings
+ warnings.warn("Using or importing the ABCs from 'collections' instead "
+ "of from 'collections.abc' is deprecated since Python 3.3, "
+ "and in 3.9 it will stop working",
+ DeprecationWarning, stacklevel=2)
+ globals()[name] = obj
+ return obj
+ raise AttributeError(f'module {__name__!r} has no attribute {name!r}')
+
+################################################################################
+### OrderedDict
+################################################################################
+
+class _OrderedDictKeysView(_collections_abc.KeysView):
+
+ def __reversed__(self):
+ yield from reversed(self._mapping)
+
+class _OrderedDictItemsView(_collections_abc.ItemsView):
+
+ def __reversed__(self):
+ for key in reversed(self._mapping):
+ yield (key, self._mapping[key])
+
+class _OrderedDictValuesView(_collections_abc.ValuesView):
+
+ def __reversed__(self):
+ for key in reversed(self._mapping):
+ yield self._mapping[key]
+
+class _Link(object):
+ __slots__ = 'prev', 'next', 'key', '__weakref__'
+
+class OrderedDict(dict):
+ 'Dictionary that remembers insertion order'
+ # An inherited dict maps keys to values.
+ # The inherited dict provides __getitem__, __len__, __contains__, and get.
+ # The remaining methods are order-aware.
+ # Big-O running times for all methods are the same as regular dictionaries.
+
+ # The internal self.__map dict maps keys to links in a doubly linked list.
+ # The circular doubly linked list starts and ends with a sentinel element.
+ # The sentinel element never gets deleted (this simplifies the algorithm).
+ # The sentinel is in self.__hardroot with a weakref proxy in self.__root.
+ # The prev links are weakref proxies (to prevent circular references).
+ # Individual links are kept alive by the hard reference in self.__map.
+ # Those hard references disappear when a key is deleted from an OrderedDict.
+
+ def __init__(self, other=(), /, **kwds):
+ '''Initialize an ordered dictionary. The signature is the same as
+ regular dictionaries. Keyword argument order is preserved.
+ '''
+ try:
+ self.__root
+ except AttributeError:
+ self.__hardroot = _Link()
+ self.__root = root = _proxy(self.__hardroot)
+ root.prev = root.next = root
+ self.__map = {}
+ self.__update(other, **kwds)
+
+ def __setitem__(self, key, value,
+ dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link):
+ 'od.__setitem__(i, y) <==> od[i]=y'
+ # Setting a new item creates a new link at the end of the linked list,
+ # and the inherited dictionary is updated with the new key/value pair.
+ if key not in self:
+ self.__map[key] = link = Link()
+ root = self.__root
+ last = root.prev
+ link.prev, link.next, link.key = last, root, key
+ last.next = link
+ root.prev = proxy(link)
+ dict_setitem(self, key, value)
+
+ def __delitem__(self, key, dict_delitem=dict.__delitem__):
+ 'od.__delitem__(y) <==> del od[y]'
+ # Deleting an existing item uses self.__map to find the link which gets
+ # removed by updating the links in the predecessor and successor nodes.
+ dict_delitem(self, key)
+ link = self.__map.pop(key)
+ link_prev = link.prev
+ link_next = link.next
+ link_prev.next = link_next
+ link_next.prev = link_prev
+ link.prev = None
+ link.next = None
+
+ def __iter__(self):
+ 'od.__iter__() <==> iter(od)'
+ # Traverse the linked list in order.
+ root = self.__root
+ curr = root.next
+ while curr is not root:
+ yield curr.key
+ curr = curr.next
+
+ def __reversed__(self):
+ 'od.__reversed__() <==> reversed(od)'
+ # Traverse the linked list in reverse order.
+ root = self.__root
+ curr = root.prev
+ while curr is not root:
+ yield curr.key
+ curr = curr.prev
+
+ def clear(self):
+ 'od.clear() -> None. Remove all items from od.'
+ root = self.__root
+ root.prev = root.next = root
+ self.__map.clear()
+ dict.clear(self)
+
+ def popitem(self, last=True):
+ '''Remove and return a (key, value) pair from the dictionary.
+
+ Pairs are returned in LIFO order if last is true or FIFO order if false.
+ '''
+ if not self:
+ raise KeyError('dictionary is empty')
+ root = self.__root
+ if last:
+ link = root.prev
+ link_prev = link.prev
+ link_prev.next = root
+ root.prev = link_prev
+ else:
+ link = root.next
+ link_next = link.next
+ root.next = link_next
+ link_next.prev = root
+ key = link.key
+ del self.__map[key]
+ value = dict.pop(self, key)
+ return key, value
+
+ def move_to_end(self, key, last=True):
+ '''Move an existing element to the end (or beginning if last is false).
+
+ Raise KeyError if the element does not exist.
+ '''
+ link = self.__map[key]
+ link_prev = link.prev
+ link_next = link.next
+ soft_link = link_next.prev
+ link_prev.next = link_next
+ link_next.prev = link_prev
+ root = self.__root
+ if last:
+ last = root.prev
+ link.prev = last
+ link.next = root
+ root.prev = soft_link
+ last.next = link
+ else:
+ first = root.next
+ link.prev = root
+ link.next = first
+ first.prev = soft_link
+ root.next = link
+
+ def __sizeof__(self):
+ sizeof = _sys.getsizeof
+ n = len(self) + 1 # number of links including root
+ size = sizeof(self.__dict__) # instance dictionary
+ size += sizeof(self.__map) * 2 # internal dict and inherited dict
+ size += sizeof(self.__hardroot) * n # link objects
+ size += sizeof(self.__root) * n # proxy objects
+ return size
+
+ update = __update = _collections_abc.MutableMapping.update
+
+ def keys(self):
+ "D.keys() -> a set-like object providing a view on D's keys"
+ return _OrderedDictKeysView(self)
+
+ def items(self):
+ "D.items() -> a set-like object providing a view on D's items"
+ return _OrderedDictItemsView(self)
+
+ def values(self):
+ "D.values() -> an object providing a view on D's values"
+ return _OrderedDictValuesView(self)
+
+ __ne__ = _collections_abc.MutableMapping.__ne__
+
+ __marker = object()
+
+ def pop(self, key, default=__marker):
+ '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
+ value. If key is not found, d is returned if given, otherwise KeyError
+ is raised.
+
+ '''
+ if key in self:
+ result = self[key]
+ del self[key]
+ return result
+ if default is self.__marker:
+ raise KeyError(key)
+ return default
+
+ def setdefault(self, key, default=None):
+ '''Insert key with a value of default if key is not in the dictionary.
+
+ Return the value for key if key is in the dictionary, else default.
+ '''
+ if key in self:
+ return self[key]
+ self[key] = default
+ return default
+
+ @_recursive_repr()
+ def __repr__(self):
+ 'od.__repr__() <==> repr(od)'
+ if not self:
+ return '%s()' % (self.__class__.__name__,)
+ return '%s(%r)' % (self.__class__.__name__, list(self.items()))
+
+ def __reduce__(self):
+ 'Return state information for pickling'
+ inst_dict = vars(self).copy()
+ for k in vars(OrderedDict()):
+ inst_dict.pop(k, None)
+ return self.__class__, (), inst_dict or None, None, iter(self.items())
+
+ def copy(self):
+ 'od.copy() -> a shallow copy of od'
+ return self.__class__(self)
+
+ @classmethod
+ def fromkeys(cls, iterable, value=None):
+ '''Create a new ordered dictionary with keys from iterable and values set to value.
+ '''
+ self = cls()
+ for key in iterable:
+ self[key] = value
+ return self
+
+ def __eq__(self, other):
+ '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
+ while comparison to a regular mapping is order-insensitive.
+
+ '''
+ if isinstance(other, OrderedDict):
+ return dict.__eq__(self, other) and all(map(_eq, self, other))
+ return dict.__eq__(self, other)
+
+
+try:
+ from _collections import OrderedDict
+except ImportError:
+ # Leave the pure Python version in place.
+ pass
+
+
+################################################################################
+### namedtuple
+################################################################################
+
+try:
+ from _collections import _tuplegetter
+except ImportError:
+ _tuplegetter = lambda index, doc: property(_itemgetter(index), doc=doc)
+
+def namedtuple(typename, field_names, *, rename=False, defaults=None, module=None):
+ """Returns a new subclass of tuple with named fields.
+
+ >>> Point = namedtuple('Point', ['x', 'y'])
+ >>> Point.__doc__ # docstring for the new class
+ 'Point(x, y)'
+ >>> p = Point(11, y=22) # instantiate with positional args or keywords
+ >>> p[0] + p[1] # indexable like a plain tuple
+ 33
+ >>> x, y = p # unpack like a regular tuple
+ >>> x, y
+ (11, 22)
+ >>> p.x + p.y # fields also accessible by name
+ 33
+ >>> d = p._asdict() # convert to a dictionary
+ >>> d['x']
+ 11
+ >>> Point(**d) # convert from a dictionary
+ Point(x=11, y=22)
+ >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
+ Point(x=100, y=22)
+
+ """
+
+ # Validate the field names. At the user's option, either generate an error
+ # message or automatically replace the field name with a valid name.
+ if isinstance(field_names, str):
+ field_names = field_names.replace(',', ' ').split()
+ field_names = list(map(str, field_names))
+ typename = _sys.intern(str(typename))
+
+ if rename:
+ seen = set()
+ for index, name in enumerate(field_names):
+ if (not name.isidentifier()
+ or _iskeyword(name)
+ or name.startswith('_')
+ or name in seen):
+ field_names[index] = f'_{index}'
+ seen.add(name)
+
+ for name in [typename] + field_names:
+ if type(name) is not str:
+ raise TypeError('Type names and field names must be strings')
+ if not name.isidentifier():
+ raise ValueError('Type names and field names must be valid '
+ f'identifiers: {name!r}')
+ if _iskeyword(name):
+ raise ValueError('Type names and field names cannot be a '
+ f'keyword: {name!r}')
+
+ seen = set()
+ for name in field_names:
+ if name.startswith('_') and not rename:
+ raise ValueError('Field names cannot start with an underscore: '
+ f'{name!r}')
+ if name in seen:
+ raise ValueError(f'Encountered duplicate field name: {name!r}')
+ seen.add(name)
+
+ field_defaults = {}
+ if defaults is not None:
+ defaults = tuple(defaults)
+ if len(defaults) > len(field_names):
+ raise TypeError('Got more default values than field names')
+ field_defaults = dict(reversed(list(zip(reversed(field_names),
+ reversed(defaults)))))
+
+ # Variables used in the methods and docstrings
+ field_names = tuple(map(_sys.intern, field_names))
+ num_fields = len(field_names)
+ arg_list = repr(field_names).replace("'", "")[1:-1]
+ repr_fmt = '(' + ', '.join(f'{name}=%r' for name in field_names) + ')'
+ tuple_new = tuple.__new__
+ _dict, _tuple, _len, _map, _zip = dict, tuple, len, map, zip
+
+ # Create all the named tuple methods to be added to the class namespace
+
+ s = f'def __new__(_cls, {arg_list}): return _tuple_new(_cls, ({arg_list}))'
+ namespace = {'_tuple_new': tuple_new, '__name__': f'namedtuple_{typename}'}
+ # Note: exec() has the side-effect of interning the field names
+ exec(s, namespace)
+ __new__ = namespace['__new__']
+ __new__.__doc__ = f'Create new instance of {typename}({arg_list})'
+ if defaults is not None:
+ __new__.__defaults__ = defaults
+
+ @classmethod
+ def _make(cls, iterable):
+ result = tuple_new(cls, iterable)
+ if _len(result) != num_fields:
+ raise TypeError(f'Expected {num_fields} arguments, got {len(result)}')
+ return result
+
+ _make.__func__.__doc__ = (f'Make a new {typename} object from a sequence '
+ 'or iterable')
+
+ def _replace(self, /, **kwds):
+ result = self._make(_map(kwds.pop, field_names, self))
+ if kwds:
+ raise ValueError(f'Got unexpected field names: {list(kwds)!r}')
+ return result
+
+ _replace.__doc__ = (f'Return a new {typename} object replacing specified '
+ 'fields with new values')
+
+ def __repr__(self):
+ 'Return a nicely formatted representation string'
+ return self.__class__.__name__ + repr_fmt % self
+
+ def _asdict(self):
+ 'Return a new dict which maps field names to their values.'
+ return _dict(_zip(self._fields, self))
+
+ def __getnewargs__(self):
+ 'Return self as a plain tuple. Used by copy and pickle.'
+ return _tuple(self)
+
+ # Modify function metadata to help with introspection and debugging
+ for method in (__new__, _make.__func__, _replace,
+ __repr__, _asdict, __getnewargs__):
+ method.__qualname__ = f'{typename}.{method.__name__}'
+
+ # Build-up the class namespace dictionary
+ # and use type() to build the result class
+ class_namespace = {
+ '__doc__': f'{typename}({arg_list})',
+ '__slots__': (),
+ '_fields': field_names,
+ '_field_defaults': field_defaults,
+ # alternate spelling for backward compatibility
+ '_fields_defaults': field_defaults,
+ '__new__': __new__,
+ '_make': _make,
+ '_replace': _replace,
+ '__repr__': __repr__,
+ '_asdict': _asdict,
+ '__getnewargs__': __getnewargs__,
+ }
+ for index, name in enumerate(field_names):
+ doc = _sys.intern(f'Alias for field number {index}')
+ class_namespace[name] = _tuplegetter(index, doc)
+
+ result = type(typename, (tuple,), class_namespace)
+
+ # For pickling to work, the __module__ variable needs to be set to the frame
+ # where the named tuple is created. Bypass this step in environments where
+ # sys._getframe is not defined (Jython for example) or sys._getframe is not
+ # defined for arguments greater than 0 (IronPython), or where the user has
+ # specified a particular module.
+ if module is None:
+ try:
+ module = _sys._getframe(1).f_globals.get('__name__', '__main__')
+ except (AttributeError, ValueError):
+ pass
+ if module is not None:
+ result.__module__ = module
+
+ return result
+
+
+########################################################################
+### Counter
+########################################################################
+
+def _count_elements(mapping, iterable):
+ 'Tally elements from the iterable.'
+ mapping_get = mapping.get
+ for elem in iterable:
+ mapping[elem] = mapping_get(elem, 0) + 1
+
+try: # Load C helper function if available
+ from _collections import _count_elements
+except ImportError:
+ pass
+
+class Counter(dict):
+ '''Dict subclass for counting hashable items. Sometimes called a bag
+ or multiset. Elements are stored as dictionary keys and their counts
+ are stored as dictionary values.
+
+ >>> c = Counter('abcdeabcdabcaba') # count elements from a string
+
+ >>> c.most_common(3) # three most common elements
+ [('a', 5), ('b', 4), ('c', 3)]
+ >>> sorted(c) # list all unique elements
+ ['a', 'b', 'c', 'd', 'e']
+ >>> ''.join(sorted(c.elements())) # list elements with repetitions
+ 'aaaaabbbbcccdde'
+ >>> sum(c.values()) # total of all counts
+ 15
+
+ >>> c['a'] # count of letter 'a'
+ 5
+ >>> for elem in 'shazam': # update counts from an iterable
+ ... c[elem] += 1 # by adding 1 to each element's count
+ >>> c['a'] # now there are seven 'a'
+ 7
+ >>> del c['b'] # remove all 'b'
+ >>> c['b'] # now there are zero 'b'
+ 0
+
+ >>> d = Counter('simsalabim') # make another counter
+ >>> c.update(d) # add in the second counter
+ >>> c['a'] # now there are nine 'a'
+ 9
+
+ >>> c.clear() # empty the counter
+ >>> c
+ Counter()
+
+ Note: If a count is set to zero or reduced to zero, it will remain
+ in the counter until the entry is deleted or the counter is cleared:
+
+ >>> c = Counter('aaabbc')
+ >>> c['b'] -= 2 # reduce the count of 'b' by two
+ >>> c.most_common() # 'b' is still in, but its count is zero
+ [('a', 3), ('c', 1), ('b', 0)]
+
+ '''
+ # References:
+ # http://en.wikipedia.org/wiki/Multiset
+ # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
+ # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
+ # http://code.activestate.com/recipes/259174/
+ # Knuth, TAOCP Vol. II section 4.6.3
+
+ def __init__(self, iterable=None, /, **kwds):
+ '''Create a new, empty Counter object. And if given, count elements
+ from an input iterable. Or, initialize the count from another mapping
+ of elements to their counts.
+
+ >>> c = Counter() # a new, empty counter
+ >>> c = Counter('gallahad') # a new counter from an iterable
+ >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
+ >>> c = Counter(a=4, b=2) # a new counter from keyword args
+
+ '''
+ super(Counter, self).__init__()
+ self.update(iterable, **kwds)
+
+ def __missing__(self, key):
+ 'The count of elements not in the Counter is zero.'
+ # Needed so that self[missing_item] does not raise KeyError
+ return 0
+
+ def most_common(self, n=None):
+ '''List the n most common elements and their counts from the most
+ common to the least. If n is None, then list all element counts.
+
+ >>> Counter('abracadabra').most_common(3)
+ [('a', 5), ('b', 2), ('r', 2)]
+
+ '''
+ # Emulate Bag.sortedByCount from Smalltalk
+ if n is None:
+ return sorted(self.items(), key=_itemgetter(1), reverse=True)
+ return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
+
+ def elements(self):
+ '''Iterator over elements repeating each as many times as its count.
+
+ >>> c = Counter('ABCABC')
+ >>> sorted(c.elements())
+ ['A', 'A', 'B', 'B', 'C', 'C']
+
+ # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
+ >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
+ >>> product = 1
+ >>> for factor in prime_factors.elements(): # loop over factors
+ ... product *= factor # and multiply them
+ >>> product
+ 1836
+
+ Note, if an element's count has been set to zero or is a negative
+ number, elements() will ignore it.
+
+ '''
+ # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
+ return _chain.from_iterable(_starmap(_repeat, self.items()))
+
+ # Override dict methods where necessary
+
+ @classmethod
+ def fromkeys(cls, iterable, v=None):
+ # There is no equivalent method for counters because the semantics
+ # would be ambiguous in cases such as Counter.fromkeys('aaabbc', v=2).
+ # Initializing counters to zero values isn't necessary because zero
+ # is already the default value for counter lookups. Initializing
+ # to one is easily accomplished with Counter(set(iterable)). For
+ # more exotic cases, create a dictionary first using a dictionary
+ # comprehension or dict.fromkeys().
+ raise NotImplementedError(
+ 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
+
+ def update(self, iterable=None, /, **kwds):
+ '''Like dict.update() but add counts instead of replacing them.
+
+ Source can be an iterable, a dictionary, or another Counter instance.
+
+ >>> c = Counter('which')
+ >>> c.update('witch') # add elements from another iterable
+ >>> d = Counter('watch')
+ >>> c.update(d) # add elements from another counter
+ >>> c['h'] # four 'h' in which, witch, and watch
+ 4
+
+ '''
+ # The regular dict.update() operation makes no sense here because the
+ # replace behavior results in the some of original untouched counts
+ # being mixed-in with all of the other counts for a mismash that
+ # doesn't have a straight-forward interpretation in most counting
+ # contexts. Instead, we implement straight-addition. Both the inputs
+ # and outputs are allowed to contain zero and negative counts.
+
+ if iterable is not None:
+ if isinstance(iterable, _collections_abc.Mapping):
+ if self:
+ self_get = self.get
+ for elem, count in iterable.items():
+ self[elem] = count + self_get(elem, 0)
+ else:
+ super(Counter, self).update(iterable) # fast path when counter is empty
+ else:
+ _count_elements(self, iterable)
+ if kwds:
+ self.update(kwds)
+
+ def subtract(self, iterable=None, /, **kwds):
+ '''Like dict.update() but subtracts counts instead of replacing them.
+ Counts can be reduced below zero. Both the inputs and outputs are
+ allowed to contain zero and negative counts.
+
+ Source can be an iterable, a dictionary, or another Counter instance.
+
+ >>> c = Counter('which')
+ >>> c.subtract('witch') # subtract elements from another iterable
+ >>> c.subtract(Counter('watch')) # subtract elements from another counter
+ >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
+ 0
+ >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
+ -1
+
+ '''
+ if iterable is not None:
+ self_get = self.get
+ if isinstance(iterable, _collections_abc.Mapping):
+ for elem, count in iterable.items():
+ self[elem] = self_get(elem, 0) - count
+ else:
+ for elem in iterable:
+ self[elem] = self_get(elem, 0) - 1
+ if kwds:
+ self.subtract(kwds)
+
+ def copy(self):
+ 'Return a shallow copy.'
+ return self.__class__(self)
+
+ def __reduce__(self):
+ return self.__class__, (dict(self),)
+
+ def __delitem__(self, elem):
+ 'Like dict.__delitem__() but does not raise KeyError for missing values.'
+ if elem in self:
+ super().__delitem__(elem)
+
+ def __repr__(self):
+ if not self:
+ return '%s()' % self.__class__.__name__
+ try:
+ items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
+ return '%s({%s})' % (self.__class__.__name__, items)
+ except TypeError:
+ # handle case where values are not orderable
+ return '{0}({1!r})'.format(self.__class__.__name__, dict(self))
+
+ # Multiset-style mathematical operations discussed in:
+ # Knuth TAOCP Volume II section 4.6.3 exercise 19
+ # and at http://en.wikipedia.org/wiki/Multiset
+ #
+ # Outputs guaranteed to only include positive counts.
+ #
+ # To strip negative and zero counts, add-in an empty counter:
+ # c += Counter()
+ #
+ # Rich comparison operators for multiset subset and superset tests
+ # are deliberately omitted due to semantic conflicts with the
+ # existing inherited dict equality method. Subset and superset
+ # semantics ignore zero counts and require that p≤q ∧ p≥q → p=q;
+ # however, that would not be the case for p=Counter(a=1, b=0)
+ # and q=Counter(a=1) where the dictionaries are not equal.
+
+ def __add__(self, other):
+ '''Add counts from two counters.
+
+ >>> Counter('abbb') + Counter('bcc')
+ Counter({'b': 4, 'c': 2, 'a': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem, count in self.items():
+ newcount = count + other[elem]
+ if newcount > 0:
+ result[elem] = newcount
+ for elem, count in other.items():
+ if elem not in self and count > 0:
+ result[elem] = count
+ return result
+
+ def __sub__(self, other):
+ ''' Subtract count, but keep only results with positive counts.
+
+ >>> Counter('abbbc') - Counter('bccd')
+ Counter({'b': 2, 'a': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem, count in self.items():
+ newcount = count - other[elem]
+ if newcount > 0:
+ result[elem] = newcount
+ for elem, count in other.items():
+ if elem not in self and count < 0:
+ result[elem] = 0 - count
+ return result
+
+ def __or__(self, other):
+ '''Union is the maximum of value in either of the input counters.
+
+ >>> Counter('abbb') | Counter('bcc')
+ Counter({'b': 3, 'c': 2, 'a': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem, count in self.items():
+ other_count = other[elem]
+ newcount = other_count if count < other_count else count
+ if newcount > 0:
+ result[elem] = newcount
+ for elem, count in other.items():
+ if elem not in self and count > 0:
+ result[elem] = count
+ return result
+
+ def __and__(self, other):
+ ''' Intersection is the minimum of corresponding counts.
+
+ >>> Counter('abbb') & Counter('bcc')
+ Counter({'b': 1})
+
+ '''
+ if not isinstance(other, Counter):
+ return NotImplemented
+ result = Counter()
+ for elem, count in self.items():
+ other_count = other[elem]
+ newcount = count if count < other_count else other_count
+ if newcount > 0:
+ result[elem] = newcount
+ return result
+
+ def __pos__(self):
+ 'Adds an empty counter, effectively stripping negative and zero counts'
+ result = Counter()
+ for elem, count in self.items():
+ if count > 0:
+ result[elem] = count
+ return result
+
+ def __neg__(self):
+ '''Subtracts from an empty counter. Strips positive and zero counts,
+ and flips the sign on negative counts.
+
+ '''
+ result = Counter()
+ for elem, count in self.items():
+ if count < 0:
+ result[elem] = 0 - count
+ return result
+
+ def _keep_positive(self):
+ '''Internal method to strip elements with a negative or zero count'''
+ nonpositive = [elem for elem, count in self.items() if not count > 0]
+ for elem in nonpositive:
+ del self[elem]
+ return self
+
+ def __iadd__(self, other):
+ '''Inplace add from another counter, keeping only positive counts.
+
+ >>> c = Counter('abbb')
+ >>> c += Counter('bcc')
+ >>> c
+ Counter({'b': 4, 'c': 2, 'a': 1})
+
+ '''
+ for elem, count in other.items():
+ self[elem] += count
+ return self._keep_positive()
+
+ def __isub__(self, other):
+ '''Inplace subtract counter, but keep only results with positive counts.
+
+ >>> c = Counter('abbbc')
+ >>> c -= Counter('bccd')
+ >>> c
+ Counter({'b': 2, 'a': 1})
+
+ '''
+ for elem, count in other.items():
+ self[elem] -= count
+ return self._keep_positive()
+
+ def __ior__(self, other):
+ '''Inplace union is the maximum of value from either counter.
+
+ >>> c = Counter('abbb')
+ >>> c |= Counter('bcc')
+ >>> c
+ Counter({'b': 3, 'c': 2, 'a': 1})
+
+ '''
+ for elem, other_count in other.items():
+ count = self[elem]
+ if other_count > count:
+ self[elem] = other_count
+ return self._keep_positive()
+
+ def __iand__(self, other):
+ '''Inplace intersection is the minimum of corresponding counts.
+
+ >>> c = Counter('abbb')
+ >>> c &= Counter('bcc')
+ >>> c
+ Counter({'b': 1})
+
+ '''
+ for elem, count in self.items():
+ other_count = other[elem]
+ if other_count < count:
+ self[elem] = other_count
+ return self._keep_positive()
+
+
+########################################################################
+### ChainMap
+########################################################################
+
+class ChainMap(_collections_abc.MutableMapping):
+ ''' A ChainMap groups multiple dicts (or other mappings) together
+ to create a single, updateable view.
+
+ The underlying mappings are stored in a list. That list is public and can
+ be accessed or updated using the *maps* attribute. There is no other
+ state.
+
+ Lookups search the underlying mappings successively until a key is found.
+ In contrast, writes, updates, and deletions only operate on the first
+ mapping.
+
+ '''
+
+ def __init__(self, *maps):
+ '''Initialize a ChainMap by setting *maps* to the given mappings.
+ If no mappings are provided, a single empty dictionary is used.
+
+ '''
+ self.maps = list(maps) or [{}] # always at least one map
+
+ def __missing__(self, key):
+ raise KeyError(key)
+
+ def __getitem__(self, key):
+ for mapping in self.maps:
+ try:
+ return mapping[key] # can't use 'key in mapping' with defaultdict
+ except KeyError:
+ pass
+ return self.__missing__(key) # support subclasses that define __missing__
+
+ def get(self, key, default=None):
+ return self[key] if key in self else default
+
+ def __len__(self):
+ return len(set().union(*self.maps)) # reuses stored hash values if possible
+
+ def __iter__(self):
+ d = {}
+ for mapping in reversed(self.maps):
+ d.update(mapping) # reuses stored hash values if possible
+ return iter(d)
+
+ def __contains__(self, key):
+ return any(key in m for m in self.maps)
+
+ def __bool__(self):
+ return any(self.maps)
+
+ @_recursive_repr()
+ def __repr__(self):
+ return f'{self.__class__.__name__}({", ".join(map(repr, self.maps))})'
+
+ @classmethod
+ def fromkeys(cls, iterable, *args):
+ 'Create a ChainMap with a single dict created from the iterable.'
+ return cls(dict.fromkeys(iterable, *args))
+
+ def copy(self):
+ 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]'
+ return self.__class__(self.maps[0].copy(), *self.maps[1:])
+
+ __copy__ = copy
+
+ def new_child(self, m=None): # like Django's Context.push()
+ '''New ChainMap with a new map followed by all previous maps.
+ If no map is provided, an empty dict is used.
+ '''
+ if m is None:
+ m = {}
+ return self.__class__(m, *self.maps)
+
+ @property
+ def parents(self): # like Django's Context.pop()
+ 'New ChainMap from maps[1:].'
+ return self.__class__(*self.maps[1:])
+
+ def __setitem__(self, key, value):
+ self.maps[0][key] = value
+
+ def __delitem__(self, key):
+ try:
+ del self.maps[0][key]
+ except KeyError:
+ raise KeyError('Key not found in the first mapping: {!r}'.format(key))
+
+ def popitem(self):
+ 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.'
+ try:
+ return self.maps[0].popitem()
+ except KeyError:
+ raise KeyError('No keys found in the first mapping.')
+
+ def pop(self, key, *args):
+ 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].'
+ try:
+ return self.maps[0].pop(key, *args)
+ except KeyError:
+ raise KeyError('Key not found in the first mapping: {!r}'.format(key))
+
+ def clear(self):
+ 'Clear maps[0], leaving maps[1:] intact.'
+ self.maps[0].clear()
+
+
+################################################################################
+### UserDict
+################################################################################
+
+class UserDict(_collections_abc.MutableMapping):
+
+ # Start by filling-out the abstract methods
+ def __init__(*args, **kwargs):
+ if not args:
+ raise TypeError("descriptor '__init__' of 'UserDict' object "
+ "needs an argument")
+ self, *args = args
+ if len(args) > 1:
+ raise TypeError('expected at most 1 arguments, got %d' % len(args))
+ if args:
+ dict = args[0]
+ elif 'dict' in kwargs:
+ dict = kwargs.pop('dict')
+ import warnings
+ warnings.warn("Passing 'dict' as keyword argument is deprecated",
+ DeprecationWarning, stacklevel=2)
+ else:
+ dict = None
+ self.data = {}
+ if dict is not None:
+ self.update(dict)
+ if kwargs:
+ self.update(kwargs)
+ __init__.__text_signature__ = '($self, dict=None, /, **kwargs)'
+
+ def __len__(self): return len(self.data)
+ def __getitem__(self, key):
+ if key in self.data:
+ return self.data[key]
+ if hasattr(self.__class__, "__missing__"):
+ return self.__class__.__missing__(self, key)
+ raise KeyError(key)
+ def __setitem__(self, key, item): self.data[key] = item
+ def __delitem__(self, key): del self.data[key]
+ def __iter__(self):
+ return iter(self.data)
+
+ # Modify __contains__ to work correctly when __missing__ is present
+ def __contains__(self, key):
+ return key in self.data
+
+ # Now, add the methods in dicts but not in MutableMapping
+ def __repr__(self): return repr(self.data)
+ def __copy__(self):
+ inst = self.__class__.__new__(self.__class__)
+ inst.__dict__.update(self.__dict__)
+ # Create a copy and avoid triggering descriptors
+ inst.__dict__["data"] = self.__dict__["data"].copy()
+ return inst
+
+ def copy(self):
+ if self.__class__ is UserDict:
+ return UserDict(self.data.copy())
+ import copy
+ data = self.data
+ try:
+ self.data = {}
+ c = copy.copy(self)
+ finally:
+ self.data = data
+ c.update(self)
+ return c
+
+ @classmethod
+ def fromkeys(cls, iterable, value=None):
+ d = cls()
+ for key in iterable:
+ d[key] = value
+ return d
+
+
+
+################################################################################
+### UserList
+################################################################################
+
+class UserList(_collections_abc.MutableSequence):
+ """A more or less complete user-defined wrapper around list objects."""
+ def __init__(self, initlist=None):
+ self.data = []
+ if initlist is not None:
+ # XXX should this accept an arbitrary sequence?
+ if type(initlist) == type(self.data):
+ self.data[:] = initlist
+ elif isinstance(initlist, UserList):
+ self.data[:] = initlist.data[:]
+ else:
+ self.data = list(initlist)
+ def __repr__(self): return repr(self.data)
+ def __lt__(self, other): return self.data < self.__cast(other)
+ def __le__(self, other): return self.data <= self.__cast(other)
+ def __eq__(self, other): return self.data == self.__cast(other)
+ def __gt__(self, other): return self.data > self.__cast(other)
+ def __ge__(self, other): return self.data >= self.__cast(other)
+ def __cast(self, other):
+ return other.data if isinstance(other, UserList) else other
+ def __contains__(self, item): return item in self.data
+ def __len__(self): return len(self.data)
+ def __getitem__(self, i):
+ if isinstance(i, slice):
+ return self.__class__(self.data[i])
+ else:
+ return self.data[i]
+ def __setitem__(self, i, item): self.data[i] = item
+ def __delitem__(self, i): del self.data[i]
+ def __add__(self, other):
+ if isinstance(other, UserList):
+ return self.__class__(self.data + other.data)
+ elif isinstance(other, type(self.data)):
+ return self.__class__(self.data + other)
+ return self.__class__(self.data + list(other))
+ def __radd__(self, other):
+ if isinstance(other, UserList):
+ return self.__class__(other.data + self.data)
+ elif isinstance(other, type(self.data)):
+ return self.__class__(other + self.data)
+ return self.__class__(list(other) + self.data)
+ def __iadd__(self, other):
+ if isinstance(other, UserList):
+ self.data += other.data
+ elif isinstance(other, type(self.data)):
+ self.data += other
+ else:
+ self.data += list(other)
+ return self
+ def __mul__(self, n):
+ return self.__class__(self.data*n)
+ __rmul__ = __mul__
+ def __imul__(self, n):
+ self.data *= n
+ return self
+ def __copy__(self):
+ inst = self.__class__.__new__(self.__class__)
+ inst.__dict__.update(self.__dict__)
+ # Create a copy and avoid triggering descriptors
+ inst.__dict__["data"] = self.__dict__["data"][:]
+ return inst
+ def append(self, item): self.data.append(item)
+ def insert(self, i, item): self.data.insert(i, item)
+ def pop(self, i=-1): return self.data.pop(i)
+ def remove(self, item): self.data.remove(item)
+ def clear(self): self.data.clear()
+ def copy(self): return self.__class__(self)
+ def count(self, item): return self.data.count(item)
+ def index(self, item, *args): return self.data.index(item, *args)
+ def reverse(self): self.data.reverse()
+ def sort(self, /, *args, **kwds): self.data.sort(*args, **kwds)
+ def extend(self, other):
+ if isinstance(other, UserList):
+ self.data.extend(other.data)
+ else:
+ self.data.extend(other)
+
+
+
+################################################################################
+### UserString
+################################################################################
+
+class UserString(_collections_abc.Sequence):
+ def __init__(self, seq):
+ if isinstance(seq, str):
+ self.data = seq
+ elif isinstance(seq, UserString):
+ self.data = seq.data[:]
+ else:
+ self.data = str(seq)
+ def __str__(self): return str(self.data)
+ def __repr__(self): return repr(self.data)
+ def __int__(self): return int(self.data)
+ def __float__(self): return float(self.data)
+ def __complex__(self): return complex(self.data)
+ def __hash__(self): return hash(self.data)
+ def __getnewargs__(self):
+ return (self.data[:],)
+
+ def __eq__(self, string):
+ if isinstance(string, UserString):
+ return self.data == string.data
+ return self.data == string
+ def __lt__(self, string):
+ if isinstance(string, UserString):
+ return self.data < string.data
+ return self.data < string
+ def __le__(self, string):
+ if isinstance(string, UserString):
+ return self.data <= string.data
+ return self.data <= string
+ def __gt__(self, string):
+ if isinstance(string, UserString):
+ return self.data > string.data
+ return self.data > string
+ def __ge__(self, string):
+ if isinstance(string, UserString):
+ return self.data >= string.data
+ return self.data >= string
+
+ def __contains__(self, char):
+ if isinstance(char, UserString):
+ char = char.data
+ return char in self.data
+
+ def __len__(self): return len(self.data)
+ def __getitem__(self, index): return self.__class__(self.data[index])
+ def __add__(self, other):
+ if isinstance(other, UserString):
+ return self.__class__(self.data + other.data)
+ elif isinstance(other, str):
+ return self.__class__(self.data + other)
+ return self.__class__(self.data + str(other))
+ def __radd__(self, other):
+ if isinstance(other, str):
+ return self.__class__(other + self.data)
+ return self.__class__(str(other) + self.data)
+ def __mul__(self, n):
+ return self.__class__(self.data*n)
+ __rmul__ = __mul__
+ def __mod__(self, args):
+ return self.__class__(self.data % args)
+ def __rmod__(self, template):
+ return self.__class__(str(template) % self)
+ # the following methods are defined in alphabetical order:
+ def capitalize(self): return self.__class__(self.data.capitalize())
+ def casefold(self):
+ return self.__class__(self.data.casefold())
+ def center(self, width, *args):
+ return self.__class__(self.data.center(width, *args))
+ def count(self, sub, start=0, end=_sys.maxsize):
+ if isinstance(sub, UserString):
+ sub = sub.data
+ return self.data.count(sub, start, end)
+ def encode(self, encoding='utf-8', errors='strict'):
+ encoding = 'utf-8' if encoding is None else encoding
+ errors = 'strict' if errors is None else errors
+ return self.data.encode(encoding, errors)
+ def endswith(self, suffix, start=0, end=_sys.maxsize):
+ return self.data.endswith(suffix, start, end)
+ def expandtabs(self, tabsize=8):
+ return self.__class__(self.data.expandtabs(tabsize))
+ def find(self, sub, start=0, end=_sys.maxsize):
+ if isinstance(sub, UserString):
+ sub = sub.data
+ return self.data.find(sub, start, end)
+ def format(self, /, *args, **kwds):
+ return self.data.format(*args, **kwds)
+ def format_map(self, mapping):
+ return self.data.format_map(mapping)
+ def index(self, sub, start=0, end=_sys.maxsize):
+ return self.data.index(sub, start, end)
+ def isalpha(self): return self.data.isalpha()
+ def isalnum(self): return self.data.isalnum()
+ def isascii(self): return self.data.isascii()
+ def isdecimal(self): return self.data.isdecimal()
+ def isdigit(self): return self.data.isdigit()
+ def isidentifier(self): return self.data.isidentifier()
+ def islower(self): return self.data.islower()
+ def isnumeric(self): return self.data.isnumeric()
+ def isprintable(self): return self.data.isprintable()
+ def isspace(self): return self.data.isspace()
+ def istitle(self): return self.data.istitle()
+ def isupper(self): return self.data.isupper()
+ def join(self, seq): return self.data.join(seq)
+ def ljust(self, width, *args):
+ return self.__class__(self.data.ljust(width, *args))
+ def lower(self): return self.__class__(self.data.lower())
+ def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))
+ maketrans = str.maketrans
+ def partition(self, sep):
+ return self.data.partition(sep)
+ def replace(self, old, new, maxsplit=-1):
+ if isinstance(old, UserString):
+ old = old.data
+ if isinstance(new, UserString):
+ new = new.data
+ return self.__class__(self.data.replace(old, new, maxsplit))
+ def rfind(self, sub, start=0, end=_sys.maxsize):
+ if isinstance(sub, UserString):
+ sub = sub.data
+ return self.data.rfind(sub, start, end)
+ def rindex(self, sub, start=0, end=_sys.maxsize):
+ return self.data.rindex(sub, start, end)
+ def rjust(self, width, *args):
+ return self.__class__(self.data.rjust(width, *args))
+ def rpartition(self, sep):
+ return self.data.rpartition(sep)
+ def rstrip(self, chars=None):
+ return self.__class__(self.data.rstrip(chars))
+ def split(self, sep=None, maxsplit=-1):
+ return self.data.split(sep, maxsplit)
+ def rsplit(self, sep=None, maxsplit=-1):
+ return self.data.rsplit(sep, maxsplit)
+ def splitlines(self, keepends=False): return self.data.splitlines(keepends)
+ def startswith(self, prefix, start=0, end=_sys.maxsize):
+ return self.data.startswith(prefix, start, end)
+ def strip(self, chars=None): return self.__class__(self.data.strip(chars))
+ def swapcase(self): return self.__class__(self.data.swapcase())
+ def title(self): return self.__class__(self.data.title())
+ def translate(self, *args):
+ return self.__class__(self.data.translate(*args))
+ def upper(self): return self.__class__(self.data.upper())
+ def zfill(self, width): return self.__class__(self.data.zfill(width))
diff --git a/dist/lib/collections/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/lib/collections/__pycache__/__init__.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..d1be6c9
Binary files /dev/null and b/dist/lib/collections/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/collections/__pycache__/abc.cpython-38.opt-1.pyc b/dist/lib/collections/__pycache__/abc.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..65996c4
Binary files /dev/null and b/dist/lib/collections/__pycache__/abc.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/collections/abc.py b/dist/lib/collections/abc.py
new file mode 100644
index 0000000..891600d
--- /dev/null
+++ b/dist/lib/collections/abc.py
@@ -0,0 +1,2 @@
+from _collections_abc import *
+from _collections_abc import __all__
diff --git a/dist/lib/colorsys.py b/dist/lib/colorsys.py
new file mode 100644
index 0000000..b93e384
--- /dev/null
+++ b/dist/lib/colorsys.py
@@ -0,0 +1,164 @@
+"""Conversion functions between RGB and other color systems.
+
+This modules provides two functions for each color system ABC:
+
+ rgb_to_abc(r, g, b) --> a, b, c
+ abc_to_rgb(a, b, c) --> r, g, b
+
+All inputs and outputs are triples of floats in the range [0.0...1.0]
+(with the exception of I and Q, which covers a slightly larger range).
+Inputs outside the valid range may cause exceptions or invalid outputs.
+
+Supported color systems:
+RGB: Red, Green, Blue components
+YIQ: Luminance, Chrominance (used by composite video signals)
+HLS: Hue, Luminance, Saturation
+HSV: Hue, Saturation, Value
+"""
+
+# References:
+# http://en.wikipedia.org/wiki/YIQ
+# http://en.wikipedia.org/wiki/HLS_color_space
+# http://en.wikipedia.org/wiki/HSV_color_space
+
+__all__ = ["rgb_to_yiq","yiq_to_rgb","rgb_to_hls","hls_to_rgb",
+ "rgb_to_hsv","hsv_to_rgb"]
+
+# Some floating point constants
+
+ONE_THIRD = 1.0/3.0
+ONE_SIXTH = 1.0/6.0
+TWO_THIRD = 2.0/3.0
+
+# YIQ: used by composite video signals (linear combinations of RGB)
+# Y: perceived grey level (0.0 == black, 1.0 == white)
+# I, Q: color components
+#
+# There are a great many versions of the constants used in these formulae.
+# The ones in this library uses constants from the FCC version of NTSC.
+
+def rgb_to_yiq(r, g, b):
+ y = 0.30*r + 0.59*g + 0.11*b
+ i = 0.74*(r-y) - 0.27*(b-y)
+ q = 0.48*(r-y) + 0.41*(b-y)
+ return (y, i, q)
+
+def yiq_to_rgb(y, i, q):
+ # r = y + (0.27*q + 0.41*i) / (0.74*0.41 + 0.27*0.48)
+ # b = y + (0.74*q - 0.48*i) / (0.74*0.41 + 0.27*0.48)
+ # g = y - (0.30*(r-y) + 0.11*(b-y)) / 0.59
+
+ r = y + 0.9468822170900693*i + 0.6235565819861433*q
+ g = y - 0.27478764629897834*i - 0.6356910791873801*q
+ b = y - 1.1085450346420322*i + 1.7090069284064666*q
+
+ if r < 0.0:
+ r = 0.0
+ if g < 0.0:
+ g = 0.0
+ if b < 0.0:
+ b = 0.0
+ if r > 1.0:
+ r = 1.0
+ if g > 1.0:
+ g = 1.0
+ if b > 1.0:
+ b = 1.0
+ return (r, g, b)
+
+
+# HLS: Hue, Luminance, Saturation
+# H: position in the spectrum
+# L: color lightness
+# S: color saturation
+
+def rgb_to_hls(r, g, b):
+ maxc = max(r, g, b)
+ minc = min(r, g, b)
+ # XXX Can optimize (maxc+minc) and (maxc-minc)
+ l = (minc+maxc)/2.0
+ if minc == maxc:
+ return 0.0, l, 0.0
+ if l <= 0.5:
+ s = (maxc-minc) / (maxc+minc)
+ else:
+ s = (maxc-minc) / (2.0-maxc-minc)
+ rc = (maxc-r) / (maxc-minc)
+ gc = (maxc-g) / (maxc-minc)
+ bc = (maxc-b) / (maxc-minc)
+ if r == maxc:
+ h = bc-gc
+ elif g == maxc:
+ h = 2.0+rc-bc
+ else:
+ h = 4.0+gc-rc
+ h = (h/6.0) % 1.0
+ return h, l, s
+
+def hls_to_rgb(h, l, s):
+ if s == 0.0:
+ return l, l, l
+ if l <= 0.5:
+ m2 = l * (1.0+s)
+ else:
+ m2 = l+s-(l*s)
+ m1 = 2.0*l - m2
+ return (_v(m1, m2, h+ONE_THIRD), _v(m1, m2, h), _v(m1, m2, h-ONE_THIRD))
+
+def _v(m1, m2, hue):
+ hue = hue % 1.0
+ if hue < ONE_SIXTH:
+ return m1 + (m2-m1)*hue*6.0
+ if hue < 0.5:
+ return m2
+ if hue < TWO_THIRD:
+ return m1 + (m2-m1)*(TWO_THIRD-hue)*6.0
+ return m1
+
+
+# HSV: Hue, Saturation, Value
+# H: position in the spectrum
+# S: color saturation ("purity")
+# V: color brightness
+
+def rgb_to_hsv(r, g, b):
+ maxc = max(r, g, b)
+ minc = min(r, g, b)
+ v = maxc
+ if minc == maxc:
+ return 0.0, 0.0, v
+ s = (maxc-minc) / maxc
+ rc = (maxc-r) / (maxc-minc)
+ gc = (maxc-g) / (maxc-minc)
+ bc = (maxc-b) / (maxc-minc)
+ if r == maxc:
+ h = bc-gc
+ elif g == maxc:
+ h = 2.0+rc-bc
+ else:
+ h = 4.0+gc-rc
+ h = (h/6.0) % 1.0
+ return h, s, v
+
+def hsv_to_rgb(h, s, v):
+ if s == 0.0:
+ return v, v, v
+ i = int(h*6.0) # XXX assume int() truncates!
+ f = (h*6.0) - i
+ p = v*(1.0 - s)
+ q = v*(1.0 - s*f)
+ t = v*(1.0 - s*(1.0-f))
+ i = i%6
+ if i == 0:
+ return v, t, p
+ if i == 1:
+ return q, v, p
+ if i == 2:
+ return p, v, t
+ if i == 3:
+ return p, q, v
+ if i == 4:
+ return t, p, v
+ if i == 5:
+ return v, p, q
+ # Cannot get here
diff --git a/dist/lib/compileall.py b/dist/lib/compileall.py
new file mode 100644
index 0000000..bfac8ef
--- /dev/null
+++ b/dist/lib/compileall.py
@@ -0,0 +1,333 @@
+"""Module/script to byte-compile all .py files to .pyc files.
+
+When called as a script with arguments, this compiles the directories
+given as arguments recursively; the -l option prevents it from
+recursing into directories.
+
+Without arguments, if compiles all modules on sys.path, without
+recursing into subdirectories. (Even though it should do so for
+packages -- for now, you'll have to deal with packages separately.)
+
+See module py_compile for details of the actual byte-compilation.
+"""
+import os
+import sys
+import importlib.util
+import py_compile
+import struct
+
+from functools import partial
+
+__all__ = ["compile_dir","compile_file","compile_path"]
+
+def _walk_dir(dir, ddir=None, maxlevels=10, quiet=0):
+ if quiet < 2 and isinstance(dir, os.PathLike):
+ dir = os.fspath(dir)
+ if not quiet:
+ print('Listing {!r}...'.format(dir))
+ try:
+ names = os.listdir(dir)
+ except OSError:
+ if quiet < 2:
+ print("Can't list {!r}".format(dir))
+ names = []
+ names.sort()
+ for name in names:
+ if name == '__pycache__':
+ continue
+ fullname = os.path.join(dir, name)
+ if ddir is not None:
+ dfile = os.path.join(ddir, name)
+ else:
+ dfile = None
+ if not os.path.isdir(fullname):
+ yield fullname, ddir
+ elif (maxlevels > 0 and name != os.curdir and name != os.pardir and
+ os.path.isdir(fullname) and not os.path.islink(fullname)):
+ yield from _walk_dir(fullname, ddir=dfile,
+ maxlevels=maxlevels - 1, quiet=quiet)
+
+def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
+ quiet=0, legacy=False, optimize=-1, workers=1,
+ invalidation_mode=None):
+ """Byte-compile all modules in the given directory tree.
+
+ Arguments (only dir is required):
+
+ dir: the directory to byte-compile
+ maxlevels: maximum recursion level (default 10)
+ ddir: the directory that will be prepended to the path to the
+ file as it is compiled into each byte-code file.
+ force: if True, force compilation, even if timestamps are up-to-date
+ quiet: full output with False or 0, errors only with 1,
+ no output with 2
+ legacy: if True, produce legacy pyc paths instead of PEP 3147 paths
+ optimize: optimization level or -1 for level of the interpreter
+ workers: maximum number of parallel workers
+ invalidation_mode: how the up-to-dateness of the pyc will be checked
+ """
+ ProcessPoolExecutor = None
+ if workers < 0:
+ raise ValueError('workers must be greater or equal to 0')
+ if workers != 1:
+ try:
+ # Only import when needed, as low resource platforms may
+ # fail to import it
+ from concurrent.futures import ProcessPoolExecutor
+ except ImportError:
+ workers = 1
+ files_and_ddirs = _walk_dir(dir, quiet=quiet, maxlevels=maxlevels,
+ ddir=ddir)
+ success = True
+ if workers != 1 and ProcessPoolExecutor is not None:
+ # If workers == 0, let ProcessPoolExecutor choose
+ workers = workers or None
+ with ProcessPoolExecutor(max_workers=workers) as executor:
+ results = executor.map(
+ partial(_compile_file_tuple,
+ force=force, rx=rx, quiet=quiet,
+ legacy=legacy, optimize=optimize,
+ invalidation_mode=invalidation_mode,
+ ),
+ files_and_ddirs)
+ success = min(results, default=True)
+ else:
+ for file, dfile in files_and_ddirs:
+ if not compile_file(file, dfile, force, rx, quiet,
+ legacy, optimize, invalidation_mode):
+ success = False
+ return success
+
+def _compile_file_tuple(file_and_dfile, **kwargs):
+ """Needs to be toplevel for ProcessPoolExecutor."""
+ file, dfile = file_and_dfile
+ return compile_file(file, dfile, **kwargs)
+
+def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
+ legacy=False, optimize=-1,
+ invalidation_mode=None):
+ """Byte-compile one file.
+
+ Arguments (only fullname is required):
+
+ fullname: the file to byte-compile
+ ddir: if given, the directory name compiled in to the
+ byte-code file.
+ force: if True, force compilation, even if timestamps are up-to-date
+ quiet: full output with False or 0, errors only with 1,
+ no output with 2
+ legacy: if True, produce legacy pyc paths instead of PEP 3147 paths
+ optimize: optimization level or -1 for level of the interpreter
+ invalidation_mode: how the up-to-dateness of the pyc will be checked
+ """
+ success = True
+ if quiet < 2 and isinstance(fullname, os.PathLike):
+ fullname = os.fspath(fullname)
+ name = os.path.basename(fullname)
+ if ddir is not None:
+ dfile = os.path.join(ddir, name)
+ else:
+ dfile = None
+ if rx is not None:
+ mo = rx.search(fullname)
+ if mo:
+ return success
+ if os.path.isfile(fullname):
+ if legacy:
+ cfile = fullname + 'c'
+ else:
+ if optimize >= 0:
+ opt = optimize if optimize >= 1 else ''
+ cfile = importlib.util.cache_from_source(
+ fullname, optimization=opt)
+ else:
+ cfile = importlib.util.cache_from_source(fullname)
+ cache_dir = os.path.dirname(cfile)
+ head, tail = name[:-3], name[-3:]
+ if tail == '.py':
+ if not force:
+ try:
+ mtime = int(os.stat(fullname).st_mtime)
+ expect = struct.pack('<4sll', importlib.util.MAGIC_NUMBER,
+ 0, mtime)
+ with open(cfile, 'rb') as chandle:
+ actual = chandle.read(12)
+ if expect == actual:
+ return success
+ except OSError:
+ pass
+ if not quiet:
+ print('Compiling {!r}...'.format(fullname))
+ try:
+ ok = py_compile.compile(fullname, cfile, dfile, True,
+ optimize=optimize,
+ invalidation_mode=invalidation_mode)
+ except py_compile.PyCompileError as err:
+ success = False
+ if quiet >= 2:
+ return success
+ elif quiet:
+ print('*** Error compiling {!r}...'.format(fullname))
+ else:
+ print('*** ', end='')
+ # escape non-printable characters in msg
+ msg = err.msg.encode(sys.stdout.encoding,
+ errors='backslashreplace')
+ msg = msg.decode(sys.stdout.encoding)
+ print(msg)
+ except (SyntaxError, UnicodeError, OSError) as e:
+ success = False
+ if quiet >= 2:
+ return success
+ elif quiet:
+ print('*** Error compiling {!r}...'.format(fullname))
+ else:
+ print('*** ', end='')
+ print(e.__class__.__name__ + ':', e)
+ else:
+ if ok == 0:
+ success = False
+ return success
+
+def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=0,
+ legacy=False, optimize=-1,
+ invalidation_mode=None):
+ """Byte-compile all module on sys.path.
+
+ Arguments (all optional):
+
+ skip_curdir: if true, skip current directory (default True)
+ maxlevels: max recursion level (default 0)
+ force: as for compile_dir() (default False)
+ quiet: as for compile_dir() (default 0)
+ legacy: as for compile_dir() (default False)
+ optimize: as for compile_dir() (default -1)
+ invalidation_mode: as for compiler_dir()
+ """
+ success = True
+ for dir in sys.path:
+ if (not dir or dir == os.curdir) and skip_curdir:
+ if quiet < 2:
+ print('Skipping current directory')
+ else:
+ success = success and compile_dir(
+ dir,
+ maxlevels,
+ None,
+ force,
+ quiet=quiet,
+ legacy=legacy,
+ optimize=optimize,
+ invalidation_mode=invalidation_mode,
+ )
+ return success
+
+
+def main():
+ """Script main program."""
+ import argparse
+
+ parser = argparse.ArgumentParser(
+ description='Utilities to support installing Python libraries.')
+ parser.add_argument('-l', action='store_const', const=0,
+ default=10, dest='maxlevels',
+ help="don't recurse into subdirectories")
+ parser.add_argument('-r', type=int, dest='recursion',
+ help=('control the maximum recursion level. '
+ 'if `-l` and `-r` options are specified, '
+ 'then `-r` takes precedence.'))
+ parser.add_argument('-f', action='store_true', dest='force',
+ help='force rebuild even if timestamps are up to date')
+ parser.add_argument('-q', action='count', dest='quiet', default=0,
+ help='output only error messages; -qq will suppress '
+ 'the error messages as well.')
+ parser.add_argument('-b', action='store_true', dest='legacy',
+ help='use legacy (pre-PEP3147) compiled file locations')
+ parser.add_argument('-d', metavar='DESTDIR', dest='ddir', default=None,
+ help=('directory to prepend to file paths for use in '
+ 'compile-time tracebacks and in runtime '
+ 'tracebacks in cases where the source file is '
+ 'unavailable'))
+ parser.add_argument('-x', metavar='REGEXP', dest='rx', default=None,
+ help=('skip files matching the regular expression; '
+ 'the regexp is searched for in the full path '
+ 'of each file considered for compilation'))
+ parser.add_argument('-i', metavar='FILE', dest='flist',
+ help=('add all the files and directories listed in '
+ 'FILE to the list considered for compilation; '
+ 'if "-", names are read from stdin'))
+ parser.add_argument('compile_dest', metavar='FILE|DIR', nargs='*',
+ help=('zero or more file and directory names '
+ 'to compile; if no arguments given, defaults '
+ 'to the equivalent of -l sys.path'))
+ parser.add_argument('-j', '--workers', default=1,
+ type=int, help='Run compileall concurrently')
+ invalidation_modes = [mode.name.lower().replace('_', '-')
+ for mode in py_compile.PycInvalidationMode]
+ parser.add_argument('--invalidation-mode',
+ choices=sorted(invalidation_modes),
+ help=('set .pyc invalidation mode; defaults to '
+ '"checked-hash" if the SOURCE_DATE_EPOCH '
+ 'environment variable is set, and '
+ '"timestamp" otherwise.'))
+
+ args = parser.parse_args()
+ compile_dests = args.compile_dest
+
+ if args.rx:
+ import re
+ args.rx = re.compile(args.rx)
+
+
+ if args.recursion is not None:
+ maxlevels = args.recursion
+ else:
+ maxlevels = args.maxlevels
+
+ # if flist is provided then load it
+ if args.flist:
+ try:
+ with (sys.stdin if args.flist=='-' else open(args.flist)) as f:
+ for line in f:
+ compile_dests.append(line.strip())
+ except OSError:
+ if args.quiet < 2:
+ print("Error reading file list {}".format(args.flist))
+ return False
+
+ if args.invalidation_mode:
+ ivl_mode = args.invalidation_mode.replace('-', '_').upper()
+ invalidation_mode = py_compile.PycInvalidationMode[ivl_mode]
+ else:
+ invalidation_mode = None
+
+ success = True
+ try:
+ if compile_dests:
+ for dest in compile_dests:
+ if os.path.isfile(dest):
+ if not compile_file(dest, args.ddir, args.force, args.rx,
+ args.quiet, args.legacy,
+ invalidation_mode=invalidation_mode):
+ success = False
+ else:
+ if not compile_dir(dest, maxlevels, args.ddir,
+ args.force, args.rx, args.quiet,
+ args.legacy, workers=args.workers,
+ invalidation_mode=invalidation_mode):
+ success = False
+ return success
+ else:
+ return compile_path(legacy=args.legacy, force=args.force,
+ quiet=args.quiet,
+ invalidation_mode=invalidation_mode)
+ except KeyboardInterrupt:
+ if args.quiet < 2:
+ print("\n[interrupted]")
+ return False
+ return True
+
+
+if __name__ == '__main__':
+ exit_status = int(not main())
+ sys.exit(exit_status)
diff --git a/dist/lib/concurrent/__init__.py b/dist/lib/concurrent/__init__.py
new file mode 100644
index 0000000..196d378
--- /dev/null
+++ b/dist/lib/concurrent/__init__.py
@@ -0,0 +1 @@
+# This directory is a Python package.
diff --git a/dist/lib/concurrent/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/lib/concurrent/__pycache__/__init__.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..cb37ba0
Binary files /dev/null and b/dist/lib/concurrent/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/concurrent/futures/__init__.py b/dist/lib/concurrent/futures/__init__.py
new file mode 100644
index 0000000..d746aea
--- /dev/null
+++ b/dist/lib/concurrent/futures/__init__.py
@@ -0,0 +1,53 @@
+# Copyright 2009 Brian Quinlan. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Execute computations asynchronously using threads or processes."""
+
+__author__ = 'Brian Quinlan (brian@sweetapp.com)'
+
+from concurrent.futures._base import (FIRST_COMPLETED,
+ FIRST_EXCEPTION,
+ ALL_COMPLETED,
+ CancelledError,
+ TimeoutError,
+ InvalidStateError,
+ BrokenExecutor,
+ Future,
+ Executor,
+ wait,
+ as_completed)
+
+__all__ = (
+ 'FIRST_COMPLETED',
+ 'FIRST_EXCEPTION',
+ 'ALL_COMPLETED',
+ 'CancelledError',
+ 'TimeoutError',
+ 'BrokenExecutor',
+ 'Future',
+ 'Executor',
+ 'wait',
+ 'as_completed',
+ 'ProcessPoolExecutor',
+ 'ThreadPoolExecutor',
+)
+
+
+def __dir__():
+ return __all__ + ('__author__', '__doc__')
+
+
+def __getattr__(name):
+ global ProcessPoolExecutor, ThreadPoolExecutor
+
+ if name == 'ProcessPoolExecutor':
+ from .process import ProcessPoolExecutor as pe
+ ProcessPoolExecutor = pe
+ return pe
+
+ if name == 'ThreadPoolExecutor':
+ from .thread import ThreadPoolExecutor as te
+ ThreadPoolExecutor = te
+ return te
+
+ raise AttributeError(f"module {__name__} has no attribute {name}")
diff --git a/dist/lib/concurrent/futures/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/lib/concurrent/futures/__pycache__/__init__.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..1d1acff
Binary files /dev/null and b/dist/lib/concurrent/futures/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/concurrent/futures/__pycache__/_base.cpython-38.opt-1.pyc b/dist/lib/concurrent/futures/__pycache__/_base.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..a007e39
Binary files /dev/null and b/dist/lib/concurrent/futures/__pycache__/_base.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/concurrent/futures/__pycache__/process.cpython-38.opt-1.pyc b/dist/lib/concurrent/futures/__pycache__/process.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..2b1efad
Binary files /dev/null and b/dist/lib/concurrent/futures/__pycache__/process.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/concurrent/futures/__pycache__/thread.cpython-38.opt-1.pyc b/dist/lib/concurrent/futures/__pycache__/thread.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..a12207a
Binary files /dev/null and b/dist/lib/concurrent/futures/__pycache__/thread.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/concurrent/futures/_base.py b/dist/lib/concurrent/futures/_base.py
new file mode 100644
index 0000000..6001e3b
--- /dev/null
+++ b/dist/lib/concurrent/futures/_base.py
@@ -0,0 +1,643 @@
+# Copyright 2009 Brian Quinlan. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+__author__ = 'Brian Quinlan (brian@sweetapp.com)'
+
+import collections
+import logging
+import threading
+import time
+
+FIRST_COMPLETED = 'FIRST_COMPLETED'
+FIRST_EXCEPTION = 'FIRST_EXCEPTION'
+ALL_COMPLETED = 'ALL_COMPLETED'
+_AS_COMPLETED = '_AS_COMPLETED'
+
+# Possible future states (for internal use by the futures package).
+PENDING = 'PENDING'
+RUNNING = 'RUNNING'
+# The future was cancelled by the user...
+CANCELLED = 'CANCELLED'
+# ...and _Waiter.add_cancelled() was called by a worker.
+CANCELLED_AND_NOTIFIED = 'CANCELLED_AND_NOTIFIED'
+FINISHED = 'FINISHED'
+
+_FUTURE_STATES = [
+ PENDING,
+ RUNNING,
+ CANCELLED,
+ CANCELLED_AND_NOTIFIED,
+ FINISHED
+]
+
+_STATE_TO_DESCRIPTION_MAP = {
+ PENDING: "pending",
+ RUNNING: "running",
+ CANCELLED: "cancelled",
+ CANCELLED_AND_NOTIFIED: "cancelled",
+ FINISHED: "finished"
+}
+
+# Logger for internal use by the futures package.
+LOGGER = logging.getLogger("concurrent.futures")
+
+class Error(Exception):
+ """Base class for all future-related exceptions."""
+ pass
+
+class CancelledError(Error):
+ """The Future was cancelled."""
+ pass
+
+class TimeoutError(Error):
+ """The operation exceeded the given deadline."""
+ pass
+
+class InvalidStateError(Error):
+ """The operation is not allowed in this state."""
+ pass
+
+class _Waiter(object):
+ """Provides the event that wait() and as_completed() block on."""
+ def __init__(self):
+ self.event = threading.Event()
+ self.finished_futures = []
+
+ def add_result(self, future):
+ self.finished_futures.append(future)
+
+ def add_exception(self, future):
+ self.finished_futures.append(future)
+
+ def add_cancelled(self, future):
+ self.finished_futures.append(future)
+
+class _AsCompletedWaiter(_Waiter):
+ """Used by as_completed()."""
+
+ def __init__(self):
+ super(_AsCompletedWaiter, self).__init__()
+ self.lock = threading.Lock()
+
+ def add_result(self, future):
+ with self.lock:
+ super(_AsCompletedWaiter, self).add_result(future)
+ self.event.set()
+
+ def add_exception(self, future):
+ with self.lock:
+ super(_AsCompletedWaiter, self).add_exception(future)
+ self.event.set()
+
+ def add_cancelled(self, future):
+ with self.lock:
+ super(_AsCompletedWaiter, self).add_cancelled(future)
+ self.event.set()
+
+class _FirstCompletedWaiter(_Waiter):
+ """Used by wait(return_when=FIRST_COMPLETED)."""
+
+ def add_result(self, future):
+ super().add_result(future)
+ self.event.set()
+
+ def add_exception(self, future):
+ super().add_exception(future)
+ self.event.set()
+
+ def add_cancelled(self, future):
+ super().add_cancelled(future)
+ self.event.set()
+
+class _AllCompletedWaiter(_Waiter):
+ """Used by wait(return_when=FIRST_EXCEPTION and ALL_COMPLETED)."""
+
+ def __init__(self, num_pending_calls, stop_on_exception):
+ self.num_pending_calls = num_pending_calls
+ self.stop_on_exception = stop_on_exception
+ self.lock = threading.Lock()
+ super().__init__()
+
+ def _decrement_pending_calls(self):
+ with self.lock:
+ self.num_pending_calls -= 1
+ if not self.num_pending_calls:
+ self.event.set()
+
+ def add_result(self, future):
+ super().add_result(future)
+ self._decrement_pending_calls()
+
+ def add_exception(self, future):
+ super().add_exception(future)
+ if self.stop_on_exception:
+ self.event.set()
+ else:
+ self._decrement_pending_calls()
+
+ def add_cancelled(self, future):
+ super().add_cancelled(future)
+ self._decrement_pending_calls()
+
+class _AcquireFutures(object):
+ """A context manager that does an ordered acquire of Future conditions."""
+
+ def __init__(self, futures):
+ self.futures = sorted(futures, key=id)
+
+ def __enter__(self):
+ for future in self.futures:
+ future._condition.acquire()
+
+ def __exit__(self, *args):
+ for future in self.futures:
+ future._condition.release()
+
+def _create_and_install_waiters(fs, return_when):
+ if return_when == _AS_COMPLETED:
+ waiter = _AsCompletedWaiter()
+ elif return_when == FIRST_COMPLETED:
+ waiter = _FirstCompletedWaiter()
+ else:
+ pending_count = sum(
+ f._state not in [CANCELLED_AND_NOTIFIED, FINISHED] for f in fs)
+
+ if return_when == FIRST_EXCEPTION:
+ waiter = _AllCompletedWaiter(pending_count, stop_on_exception=True)
+ elif return_when == ALL_COMPLETED:
+ waiter = _AllCompletedWaiter(pending_count, stop_on_exception=False)
+ else:
+ raise ValueError("Invalid return condition: %r" % return_when)
+
+ for f in fs:
+ f._waiters.append(waiter)
+
+ return waiter
+
+
+def _yield_finished_futures(fs, waiter, ref_collect):
+ """
+ Iterate on the list *fs*, yielding finished futures one by one in
+ reverse order.
+ Before yielding a future, *waiter* is removed from its waiters
+ and the future is removed from each set in the collection of sets
+ *ref_collect*.
+
+ The aim of this function is to avoid keeping stale references after
+ the future is yielded and before the iterator resumes.
+ """
+ while fs:
+ f = fs[-1]
+ for futures_set in ref_collect:
+ futures_set.remove(f)
+ with f._condition:
+ f._waiters.remove(waiter)
+ del f
+ # Careful not to keep a reference to the popped value
+ yield fs.pop()
+
+
+def as_completed(fs, timeout=None):
+ """An iterator over the given futures that yields each as it completes.
+
+ Args:
+ fs: The sequence of Futures (possibly created by different Executors) to
+ iterate over.
+ timeout: The maximum number of seconds to wait. If None, then there
+ is no limit on the wait time.
+
+ Returns:
+ An iterator that yields the given Futures as they complete (finished or
+ cancelled). If any given Futures are duplicated, they will be returned
+ once.
+
+ Raises:
+ TimeoutError: If the entire result iterator could not be generated
+ before the given timeout.
+ """
+ if timeout is not None:
+ end_time = timeout + time.monotonic()
+
+ fs = set(fs)
+ total_futures = len(fs)
+ with _AcquireFutures(fs):
+ finished = set(
+ f for f in fs
+ if f._state in [CANCELLED_AND_NOTIFIED, FINISHED])
+ pending = fs - finished
+ waiter = _create_and_install_waiters(fs, _AS_COMPLETED)
+ finished = list(finished)
+ try:
+ yield from _yield_finished_futures(finished, waiter,
+ ref_collect=(fs,))
+
+ while pending:
+ if timeout is None:
+ wait_timeout = None
+ else:
+ wait_timeout = end_time - time.monotonic()
+ if wait_timeout < 0:
+ raise TimeoutError(
+ '%d (of %d) futures unfinished' % (
+ len(pending), total_futures))
+
+ waiter.event.wait(wait_timeout)
+
+ with waiter.lock:
+ finished = waiter.finished_futures
+ waiter.finished_futures = []
+ waiter.event.clear()
+
+ # reverse to keep finishing order
+ finished.reverse()
+ yield from _yield_finished_futures(finished, waiter,
+ ref_collect=(fs, pending))
+
+ finally:
+ # Remove waiter from unfinished futures
+ for f in fs:
+ with f._condition:
+ f._waiters.remove(waiter)
+
+DoneAndNotDoneFutures = collections.namedtuple(
+ 'DoneAndNotDoneFutures', 'done not_done')
+def wait(fs, timeout=None, return_when=ALL_COMPLETED):
+ """Wait for the futures in the given sequence to complete.
+
+ Args:
+ fs: The sequence of Futures (possibly created by different Executors) to
+ wait upon.
+ timeout: The maximum number of seconds to wait. If None, then there
+ is no limit on the wait time.
+ return_when: Indicates when this function should return. The options
+ are:
+
+ FIRST_COMPLETED - Return when any future finishes or is
+ cancelled.
+ FIRST_EXCEPTION - Return when any future finishes by raising an
+ exception. If no future raises an exception
+ then it is equivalent to ALL_COMPLETED.
+ ALL_COMPLETED - Return when all futures finish or are cancelled.
+
+ Returns:
+ A named 2-tuple of sets. The first set, named 'done', contains the
+ futures that completed (is finished or cancelled) before the wait
+ completed. The second set, named 'not_done', contains uncompleted
+ futures.
+ """
+ with _AcquireFutures(fs):
+ done = set(f for f in fs
+ if f._state in [CANCELLED_AND_NOTIFIED, FINISHED])
+ not_done = set(fs) - done
+
+ if (return_when == FIRST_COMPLETED) and done:
+ return DoneAndNotDoneFutures(done, not_done)
+ elif (return_when == FIRST_EXCEPTION) and done:
+ if any(f for f in done
+ if not f.cancelled() and f.exception() is not None):
+ return DoneAndNotDoneFutures(done, not_done)
+
+ if len(done) == len(fs):
+ return DoneAndNotDoneFutures(done, not_done)
+
+ waiter = _create_and_install_waiters(fs, return_when)
+
+ waiter.event.wait(timeout)
+ for f in fs:
+ with f._condition:
+ f._waiters.remove(waiter)
+
+ done.update(waiter.finished_futures)
+ return DoneAndNotDoneFutures(done, set(fs) - done)
+
+class Future(object):
+ """Represents the result of an asynchronous computation."""
+
+ def __init__(self):
+ """Initializes the future. Should not be called by clients."""
+ self._condition = threading.Condition()
+ self._state = PENDING
+ self._result = None
+ self._exception = None
+ self._waiters = []
+ self._done_callbacks = []
+
+ def _invoke_callbacks(self):
+ for callback in self._done_callbacks:
+ try:
+ callback(self)
+ except Exception:
+ LOGGER.exception('exception calling callback for %r', self)
+
+ def __repr__(self):
+ with self._condition:
+ if self._state == FINISHED:
+ if self._exception:
+ return '<%s at %#x state=%s raised %s>' % (
+ self.__class__.__name__,
+ id(self),
+ _STATE_TO_DESCRIPTION_MAP[self._state],
+ self._exception.__class__.__name__)
+ else:
+ return '<%s at %#x state=%s returned %s>' % (
+ self.__class__.__name__,
+ id(self),
+ _STATE_TO_DESCRIPTION_MAP[self._state],
+ self._result.__class__.__name__)
+ return '<%s at %#x state=%s>' % (
+ self.__class__.__name__,
+ id(self),
+ _STATE_TO_DESCRIPTION_MAP[self._state])
+
+ def cancel(self):
+ """Cancel the future if possible.
+
+ Returns True if the future was cancelled, False otherwise. A future
+ cannot be cancelled if it is running or has already completed.
+ """
+ with self._condition:
+ if self._state in [RUNNING, FINISHED]:
+ return False
+
+ if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
+ return True
+
+ self._state = CANCELLED
+ self._condition.notify_all()
+
+ self._invoke_callbacks()
+ return True
+
+ def cancelled(self):
+ """Return True if the future was cancelled."""
+ with self._condition:
+ return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]
+
+ def running(self):
+ """Return True if the future is currently executing."""
+ with self._condition:
+ return self._state == RUNNING
+
+ def done(self):
+ """Return True of the future was cancelled or finished executing."""
+ with self._condition:
+ return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED]
+
+ def __get_result(self):
+ if self._exception:
+ raise self._exception
+ else:
+ return self._result
+
+ def add_done_callback(self, fn):
+ """Attaches a callable that will be called when the future finishes.
+
+ Args:
+ fn: A callable that will be called with this future as its only
+ argument when the future completes or is cancelled. The callable
+ will always be called by a thread in the same process in which
+ it was added. If the future has already completed or been
+ cancelled then the callable will be called immediately. These
+ callables are called in the order that they were added.
+ """
+ with self._condition:
+ if self._state not in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED]:
+ self._done_callbacks.append(fn)
+ return
+ try:
+ fn(self)
+ except Exception:
+ LOGGER.exception('exception calling callback for %r', self)
+
+ def result(self, timeout=None):
+ """Return the result of the call that the future represents.
+
+ Args:
+ timeout: The number of seconds to wait for the result if the future
+ isn't done. If None, then there is no limit on the wait time.
+
+ Returns:
+ The result of the call that the future represents.
+
+ Raises:
+ CancelledError: If the future was cancelled.
+ TimeoutError: If the future didn't finish executing before the given
+ timeout.
+ Exception: If the call raised then that exception will be raised.
+ """
+ with self._condition:
+ if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
+ raise CancelledError()
+ elif self._state == FINISHED:
+ return self.__get_result()
+
+ self._condition.wait(timeout)
+
+ if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
+ raise CancelledError()
+ elif self._state == FINISHED:
+ return self.__get_result()
+ else:
+ raise TimeoutError()
+
+ def exception(self, timeout=None):
+ """Return the exception raised by the call that the future represents.
+
+ Args:
+ timeout: The number of seconds to wait for the exception if the
+ future isn't done. If None, then there is no limit on the wait
+ time.
+
+ Returns:
+ The exception raised by the call that the future represents or None
+ if the call completed without raising.
+
+ Raises:
+ CancelledError: If the future was cancelled.
+ TimeoutError: If the future didn't finish executing before the given
+ timeout.
+ """
+
+ with self._condition:
+ if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
+ raise CancelledError()
+ elif self._state == FINISHED:
+ return self._exception
+
+ self._condition.wait(timeout)
+
+ if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
+ raise CancelledError()
+ elif self._state == FINISHED:
+ return self._exception
+ else:
+ raise TimeoutError()
+
+ # The following methods should only be used by Executors and in tests.
+ def set_running_or_notify_cancel(self):
+ """Mark the future as running or process any cancel notifications.
+
+ Should only be used by Executor implementations and unit tests.
+
+ If the future has been cancelled (cancel() was called and returned
+ True) then any threads waiting on the future completing (though calls
+ to as_completed() or wait()) are notified and False is returned.
+
+ If the future was not cancelled then it is put in the running state
+ (future calls to running() will return True) and True is returned.
+
+ This method should be called by Executor implementations before
+ executing the work associated with this future. If this method returns
+ False then the work should not be executed.
+
+ Returns:
+ False if the Future was cancelled, True otherwise.
+
+ Raises:
+ RuntimeError: if this method was already called or if set_result()
+ or set_exception() was called.
+ """
+ with self._condition:
+ if self._state == CANCELLED:
+ self._state = CANCELLED_AND_NOTIFIED
+ for waiter in self._waiters:
+ waiter.add_cancelled(self)
+ # self._condition.notify_all() is not necessary because
+ # self.cancel() triggers a notification.
+ return False
+ elif self._state == PENDING:
+ self._state = RUNNING
+ return True
+ else:
+ LOGGER.critical('Future %s in unexpected state: %s',
+ id(self),
+ self._state)
+ raise RuntimeError('Future in unexpected state')
+
+ def set_result(self, result):
+ """Sets the return value of work associated with the future.
+
+ Should only be used by Executor implementations and unit tests.
+ """
+ with self._condition:
+ if self._state in {CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED}:
+ raise InvalidStateError('{}: {!r}'.format(self._state, self))
+ self._result = result
+ self._state = FINISHED
+ for waiter in self._waiters:
+ waiter.add_result(self)
+ self._condition.notify_all()
+ self._invoke_callbacks()
+
+ def set_exception(self, exception):
+ """Sets the result of the future as being the given exception.
+
+ Should only be used by Executor implementations and unit tests.
+ """
+ with self._condition:
+ if self._state in {CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED}:
+ raise InvalidStateError('{}: {!r}'.format(self._state, self))
+ self._exception = exception
+ self._state = FINISHED
+ for waiter in self._waiters:
+ waiter.add_exception(self)
+ self._condition.notify_all()
+ self._invoke_callbacks()
+
+class Executor(object):
+ """This is an abstract base class for concrete asynchronous executors."""
+
+ def submit(*args, **kwargs):
+ """Submits a callable to be executed with the given arguments.
+
+ Schedules the callable to be executed as fn(*args, **kwargs) and returns
+ a Future instance representing the execution of the callable.
+
+ Returns:
+ A Future representing the given call.
+ """
+ if len(args) >= 2:
+ pass
+ elif not args:
+ raise TypeError("descriptor 'submit' of 'Executor' object "
+ "needs an argument")
+ elif 'fn' in kwargs:
+ import warnings
+ warnings.warn("Passing 'fn' as keyword argument is deprecated",
+ DeprecationWarning, stacklevel=2)
+ else:
+ raise TypeError('submit expected at least 1 positional argument, '
+ 'got %d' % (len(args)-1))
+
+ raise NotImplementedError()
+ submit.__text_signature__ = '($self, fn, /, *args, **kwargs)'
+
+ def map(self, fn, *iterables, timeout=None, chunksize=1):
+ """Returns an iterator equivalent to map(fn, iter).
+
+ Args:
+ fn: A callable that will take as many arguments as there are
+ passed iterables.
+ timeout: The maximum number of seconds to wait. If None, then there
+ is no limit on the wait time.
+ chunksize: The size of the chunks the iterable will be broken into
+ before being passed to a child process. This argument is only
+ used by ProcessPoolExecutor; it is ignored by
+ ThreadPoolExecutor.
+
+ Returns:
+ An iterator equivalent to: map(func, *iterables) but the calls may
+ be evaluated out-of-order.
+
+ Raises:
+ TimeoutError: If the entire result iterator could not be generated
+ before the given timeout.
+ Exception: If fn(*args) raises for any values.
+ """
+ if timeout is not None:
+ end_time = timeout + time.monotonic()
+
+ fs = [self.submit(fn, *args) for args in zip(*iterables)]
+
+ # Yield must be hidden in closure so that the futures are submitted
+ # before the first iterator value is required.
+ def result_iterator():
+ try:
+ # reverse to keep finishing order
+ fs.reverse()
+ while fs:
+ # Careful not to keep a reference to the popped future
+ if timeout is None:
+ yield fs.pop().result()
+ else:
+ yield fs.pop().result(end_time - time.monotonic())
+ finally:
+ for future in fs:
+ future.cancel()
+ return result_iterator()
+
+ def shutdown(self, wait=True):
+ """Clean-up the resources associated with the Executor.
+
+ It is safe to call this method several times. Otherwise, no other
+ methods can be called after this one.
+
+ Args:
+ wait: If True then shutdown will not return until all running
+ futures have finished executing and the resources used by the
+ executor have been reclaimed.
+ """
+ pass
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ self.shutdown(wait=True)
+ return False
+
+
+class BrokenExecutor(RuntimeError):
+ """
+ Raised when a executor has become non-functional after a severe failure.
+ """
diff --git a/dist/lib/concurrent/futures/process.py b/dist/lib/concurrent/futures/process.py
new file mode 100644
index 0000000..2b2b78e
--- /dev/null
+++ b/dist/lib/concurrent/futures/process.py
@@ -0,0 +1,704 @@
+# Copyright 2009 Brian Quinlan. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Implements ProcessPoolExecutor.
+
+The following diagram and text describe the data-flow through the system:
+
+|======================= In-process =====================|== Out-of-process ==|
+
++----------+ +----------+ +--------+ +-----------+ +---------+
+| | => | Work Ids | | | | Call Q | | Process |
+| | +----------+ | | +-----------+ | Pool |
+| | | ... | | | | ... | +---------+
+| | | 6 | => | | => | 5, call() | => | |
+| | | 7 | | | | ... | | |
+| Process | | ... | | Local | +-----------+ | Process |
+| Pool | +----------+ | Worker | | #1..n |
+| Executor | | Thread | | |
+| | +----------- + | | +-----------+ | |
+| | <=> | Work Items | <=> | | <= | Result Q | <= | |
+| | +------------+ | | +-----------+ | |
+| | | 6: call() | | | | ... | | |
+| | | future | | | | 4, result | | |
+| | | ... | | | | 3, except | | |
++----------+ +------------+ +--------+ +-----------+ +---------+
+
+Executor.submit() called:
+- creates a uniquely numbered _WorkItem and adds it to the "Work Items" dict
+- adds the id of the _WorkItem to the "Work Ids" queue
+
+Local worker thread:
+- reads work ids from the "Work Ids" queue and looks up the corresponding
+ WorkItem from the "Work Items" dict: if the work item has been cancelled then
+ it is simply removed from the dict, otherwise it is repackaged as a
+ _CallItem and put in the "Call Q". New _CallItems are put in the "Call Q"
+ until "Call Q" is full. NOTE: the size of the "Call Q" is kept small because
+ calls placed in the "Call Q" can no longer be cancelled with Future.cancel().
+- reads _ResultItems from "Result Q", updates the future stored in the
+ "Work Items" dict and deletes the dict entry
+
+Process #1..n:
+- reads _CallItems from "Call Q", executes the calls, and puts the resulting
+ _ResultItems in "Result Q"
+"""
+
+__author__ = 'Brian Quinlan (brian@sweetapp.com)'
+
+import atexit
+import os
+from concurrent.futures import _base
+import queue
+from queue import Full
+import multiprocessing as mp
+import multiprocessing.connection
+from multiprocessing.queues import Queue
+import threading
+import weakref
+from functools import partial
+import itertools
+import sys
+import traceback
+
+# Workers are created as daemon threads and processes. This is done to allow the
+# interpreter to exit when there are still idle processes in a
+# ProcessPoolExecutor's process pool (i.e. shutdown() was not called). However,
+# allowing workers to die with the interpreter has two undesirable properties:
+# - The workers would still be running during interpreter shutdown,
+# meaning that they would fail in unpredictable ways.
+# - The workers could be killed while evaluating a work item, which could
+# be bad if the callable being evaluated has external side-effects e.g.
+# writing to a file.
+#
+# To work around this problem, an exit handler is installed which tells the
+# workers to exit when their work queues are empty and then waits until the
+# threads/processes finish.
+
+_threads_wakeups = weakref.WeakKeyDictionary()
+_global_shutdown = False
+
+
+class _ThreadWakeup:
+ def __init__(self):
+ self._reader, self._writer = mp.Pipe(duplex=False)
+
+ def close(self):
+ self._writer.close()
+ self._reader.close()
+
+ def wakeup(self):
+ self._writer.send_bytes(b"")
+
+ def clear(self):
+ while self._reader.poll():
+ self._reader.recv_bytes()
+
+
+def _python_exit():
+ global _global_shutdown
+ _global_shutdown = True
+ items = list(_threads_wakeups.items())
+ for _, thread_wakeup in items:
+ thread_wakeup.wakeup()
+ for t, _ in items:
+ t.join()
+
+# Controls how many more calls than processes will be queued in the call queue.
+# A smaller number will mean that processes spend more time idle waiting for
+# work while a larger number will make Future.cancel() succeed less frequently
+# (Futures in the call queue cannot be cancelled).
+EXTRA_QUEUED_CALLS = 1
+
+
+# On Windows, WaitForMultipleObjects is used to wait for processes to finish.
+# It can wait on, at most, 63 objects. There is an overhead of two objects:
+# - the result queue reader
+# - the thread wakeup reader
+_MAX_WINDOWS_WORKERS = 63 - 2
+
+# Hack to embed stringification of remote traceback in local traceback
+
+class _RemoteTraceback(Exception):
+ def __init__(self, tb):
+ self.tb = tb
+ def __str__(self):
+ return self.tb
+
+class _ExceptionWithTraceback:
+ def __init__(self, exc, tb):
+ tb = traceback.format_exception(type(exc), exc, tb)
+ tb = ''.join(tb)
+ self.exc = exc
+ self.tb = '\n"""\n%s"""' % tb
+ def __reduce__(self):
+ return _rebuild_exc, (self.exc, self.tb)
+
+def _rebuild_exc(exc, tb):
+ exc.__cause__ = _RemoteTraceback(tb)
+ return exc
+
+class _WorkItem(object):
+ def __init__(self, future, fn, args, kwargs):
+ self.future = future
+ self.fn = fn
+ self.args = args
+ self.kwargs = kwargs
+
+class _ResultItem(object):
+ def __init__(self, work_id, exception=None, result=None):
+ self.work_id = work_id
+ self.exception = exception
+ self.result = result
+
+class _CallItem(object):
+ def __init__(self, work_id, fn, args, kwargs):
+ self.work_id = work_id
+ self.fn = fn
+ self.args = args
+ self.kwargs = kwargs
+
+
+class _SafeQueue(Queue):
+ """Safe Queue set exception to the future object linked to a job"""
+ def __init__(self, max_size=0, *, ctx, pending_work_items):
+ self.pending_work_items = pending_work_items
+ super().__init__(max_size, ctx=ctx)
+
+ def _on_queue_feeder_error(self, e, obj):
+ if isinstance(obj, _CallItem):
+ tb = traceback.format_exception(type(e), e, e.__traceback__)
+ e.__cause__ = _RemoteTraceback('\n"""\n{}"""'.format(''.join(tb)))
+ work_item = self.pending_work_items.pop(obj.work_id, None)
+ # work_item can be None if another process terminated. In this case,
+ # the queue_manager_thread fails all work_items with BrokenProcessPool
+ if work_item is not None:
+ work_item.future.set_exception(e)
+ else:
+ super()._on_queue_feeder_error(e, obj)
+
+
+def _get_chunks(*iterables, chunksize):
+ """ Iterates over zip()ed iterables in chunks. """
+ it = zip(*iterables)
+ while True:
+ chunk = tuple(itertools.islice(it, chunksize))
+ if not chunk:
+ return
+ yield chunk
+
+def _process_chunk(fn, chunk):
+ """ Processes a chunk of an iterable passed to map.
+
+ Runs the function passed to map() on a chunk of the
+ iterable passed to map.
+
+ This function is run in a separate process.
+
+ """
+ return [fn(*args) for args in chunk]
+
+
+def _sendback_result(result_queue, work_id, result=None, exception=None):
+ """Safely send back the given result or exception"""
+ try:
+ result_queue.put(_ResultItem(work_id, result=result,
+ exception=exception))
+ except BaseException as e:
+ exc = _ExceptionWithTraceback(e, e.__traceback__)
+ result_queue.put(_ResultItem(work_id, exception=exc))
+
+
+def _process_worker(call_queue, result_queue, initializer, initargs):
+ """Evaluates calls from call_queue and places the results in result_queue.
+
+ This worker is run in a separate process.
+
+ Args:
+ call_queue: A ctx.Queue of _CallItems that will be read and
+ evaluated by the worker.
+ result_queue: A ctx.Queue of _ResultItems that will written
+ to by the worker.
+ initializer: A callable initializer, or None
+ initargs: A tuple of args for the initializer
+ """
+ if initializer is not None:
+ try:
+ initializer(*initargs)
+ except BaseException:
+ _base.LOGGER.critical('Exception in initializer:', exc_info=True)
+ # The parent will notice that the process stopped and
+ # mark the pool broken
+ return
+ while True:
+ call_item = call_queue.get(block=True)
+ if call_item is None:
+ # Wake up queue management thread
+ result_queue.put(os.getpid())
+ return
+ try:
+ r = call_item.fn(*call_item.args, **call_item.kwargs)
+ except BaseException as e:
+ exc = _ExceptionWithTraceback(e, e.__traceback__)
+ _sendback_result(result_queue, call_item.work_id, exception=exc)
+ else:
+ _sendback_result(result_queue, call_item.work_id, result=r)
+ del r
+
+ # Liberate the resource as soon as possible, to avoid holding onto
+ # open files or shared memory that is not needed anymore
+ del call_item
+
+
+def _add_call_item_to_queue(pending_work_items,
+ work_ids,
+ call_queue):
+ """Fills call_queue with _WorkItems from pending_work_items.
+
+ This function never blocks.
+
+ Args:
+ pending_work_items: A dict mapping work ids to _WorkItems e.g.
+ {5: <_WorkItem...>, 6: <_WorkItem...>, ...}
+ work_ids: A queue.Queue of work ids e.g. Queue([5, 6, ...]). Work ids
+ are consumed and the corresponding _WorkItems from
+ pending_work_items are transformed into _CallItems and put in
+ call_queue.
+ call_queue: A multiprocessing.Queue that will be filled with _CallItems
+ derived from _WorkItems.
+ """
+ while True:
+ if call_queue.full():
+ return
+ try:
+ work_id = work_ids.get(block=False)
+ except queue.Empty:
+ return
+ else:
+ work_item = pending_work_items[work_id]
+
+ if work_item.future.set_running_or_notify_cancel():
+ call_queue.put(_CallItem(work_id,
+ work_item.fn,
+ work_item.args,
+ work_item.kwargs),
+ block=True)
+ else:
+ del pending_work_items[work_id]
+ continue
+
+
+def _queue_management_worker(executor_reference,
+ processes,
+ pending_work_items,
+ work_ids_queue,
+ call_queue,
+ result_queue,
+ thread_wakeup):
+ """Manages the communication between this process and the worker processes.
+
+ This function is run in a local thread.
+
+ Args:
+ executor_reference: A weakref.ref to the ProcessPoolExecutor that owns
+ this thread. Used to determine if the ProcessPoolExecutor has been
+ garbage collected and that this function can exit.
+ process: A list of the ctx.Process instances used as
+ workers.
+ pending_work_items: A dict mapping work ids to _WorkItems e.g.
+ {5: <_WorkItem...>, 6: <_WorkItem...>, ...}
+ work_ids_queue: A queue.Queue of work ids e.g. Queue([5, 6, ...]).
+ call_queue: A ctx.Queue that will be filled with _CallItems
+ derived from _WorkItems for processing by the process workers.
+ result_queue: A ctx.SimpleQueue of _ResultItems generated by the
+ process workers.
+ thread_wakeup: A _ThreadWakeup to allow waking up the
+ queue_manager_thread from the main Thread and avoid deadlocks
+ caused by permanently locked queues.
+ """
+ executor = None
+
+ def shutting_down():
+ return (_global_shutdown or executor is None
+ or executor._shutdown_thread)
+
+ def shutdown_worker():
+ # This is an upper bound on the number of children alive.
+ n_children_alive = sum(p.is_alive() for p in processes.values())
+ n_children_to_stop = n_children_alive
+ n_sentinels_sent = 0
+ # Send the right number of sentinels, to make sure all children are
+ # properly terminated.
+ while n_sentinels_sent < n_children_to_stop and n_children_alive > 0:
+ for i in range(n_children_to_stop - n_sentinels_sent):
+ try:
+ call_queue.put_nowait(None)
+ n_sentinels_sent += 1
+ except Full:
+ break
+ n_children_alive = sum(p.is_alive() for p in processes.values())
+
+ # Release the queue's resources as soon as possible.
+ call_queue.close()
+ # If .join() is not called on the created processes then
+ # some ctx.Queue methods may deadlock on Mac OS X.
+ for p in processes.values():
+ p.join()
+
+ result_reader = result_queue._reader
+ wakeup_reader = thread_wakeup._reader
+ readers = [result_reader, wakeup_reader]
+
+ while True:
+ _add_call_item_to_queue(pending_work_items,
+ work_ids_queue,
+ call_queue)
+
+ # Wait for a result to be ready in the result_queue while checking
+ # that all worker processes are still running, or for a wake up
+ # signal send. The wake up signals come either from new tasks being
+ # submitted, from the executor being shutdown/gc-ed, or from the
+ # shutdown of the python interpreter.
+ worker_sentinels = [p.sentinel for p in processes.values()]
+ ready = mp.connection.wait(readers + worker_sentinels)
+
+ cause = None
+ is_broken = True
+ if result_reader in ready:
+ try:
+ result_item = result_reader.recv()
+ is_broken = False
+ except BaseException as e:
+ cause = traceback.format_exception(type(e), e, e.__traceback__)
+
+ elif wakeup_reader in ready:
+ is_broken = False
+ result_item = None
+ thread_wakeup.clear()
+ if is_broken:
+ # Mark the process pool broken so that submits fail right now.
+ executor = executor_reference()
+ if executor is not None:
+ executor._broken = ('A child process terminated '
+ 'abruptly, the process pool is not '
+ 'usable anymore')
+ executor._shutdown_thread = True
+ executor = None
+ bpe = BrokenProcessPool("A process in the process pool was "
+ "terminated abruptly while the future was "
+ "running or pending.")
+ if cause is not None:
+ bpe.__cause__ = _RemoteTraceback(
+ f"\n'''\n{''.join(cause)}'''")
+ # All futures in flight must be marked failed
+ for work_id, work_item in pending_work_items.items():
+ work_item.future.set_exception(bpe)
+ # Delete references to object. See issue16284
+ del work_item
+ pending_work_items.clear()
+ # Terminate remaining workers forcibly: the queues or their
+ # locks may be in a dirty state and block forever.
+ for p in processes.values():
+ p.terminate()
+ shutdown_worker()
+ return
+ if isinstance(result_item, int):
+ # Clean shutdown of a worker using its PID
+ # (avoids marking the executor broken)
+ assert shutting_down()
+ p = processes.pop(result_item)
+ p.join()
+ if not processes:
+ shutdown_worker()
+ return
+ elif result_item is not None:
+ work_item = pending_work_items.pop(result_item.work_id, None)
+ # work_item can be None if another process terminated (see above)
+ if work_item is not None:
+ if result_item.exception:
+ work_item.future.set_exception(result_item.exception)
+ else:
+ work_item.future.set_result(result_item.result)
+ # Delete references to object. See issue16284
+ del work_item
+ # Delete reference to result_item
+ del result_item
+
+ # Check whether we should start shutting down.
+ executor = executor_reference()
+ # No more work items can be added if:
+ # - The interpreter is shutting down OR
+ # - The executor that owns this worker has been collected OR
+ # - The executor that owns this worker has been shutdown.
+ if shutting_down():
+ try:
+ # Flag the executor as shutting down as early as possible if it
+ # is not gc-ed yet.
+ if executor is not None:
+ executor._shutdown_thread = True
+ # Since no new work items can be added, it is safe to shutdown
+ # this thread if there are no pending work items.
+ if not pending_work_items:
+ shutdown_worker()
+ return
+ except Full:
+ # This is not a problem: we will eventually be woken up (in
+ # result_queue.get()) and be able to send a sentinel again.
+ pass
+ executor = None
+
+
+_system_limits_checked = False
+_system_limited = None
+
+
+def _check_system_limits():
+ global _system_limits_checked, _system_limited
+ if _system_limits_checked:
+ if _system_limited:
+ raise NotImplementedError(_system_limited)
+ _system_limits_checked = True
+ try:
+ nsems_max = os.sysconf("SC_SEM_NSEMS_MAX")
+ except (AttributeError, ValueError):
+ # sysconf not available or setting not available
+ return
+ if nsems_max == -1:
+ # indetermined limit, assume that limit is determined
+ # by available memory only
+ return
+ if nsems_max >= 256:
+ # minimum number of semaphores available
+ # according to POSIX
+ return
+ _system_limited = ("system provides too few semaphores (%d"
+ " available, 256 necessary)" % nsems_max)
+ raise NotImplementedError(_system_limited)
+
+
+def _chain_from_iterable_of_lists(iterable):
+ """
+ Specialized implementation of itertools.chain.from_iterable.
+ Each item in *iterable* should be a list. This function is
+ careful not to keep references to yielded objects.
+ """
+ for element in iterable:
+ element.reverse()
+ while element:
+ yield element.pop()
+
+
+class BrokenProcessPool(_base.BrokenExecutor):
+ """
+ Raised when a process in a ProcessPoolExecutor terminated abruptly
+ while a future was in the running state.
+ """
+
+
+class ProcessPoolExecutor(_base.Executor):
+ def __init__(self, max_workers=None, mp_context=None,
+ initializer=None, initargs=()):
+ """Initializes a new ProcessPoolExecutor instance.
+
+ Args:
+ max_workers: The maximum number of processes that can be used to
+ execute the given calls. If None or not given then as many
+ worker processes will be created as the machine has processors.
+ mp_context: A multiprocessing context to launch the workers. This
+ object should provide SimpleQueue, Queue and Process.
+ initializer: A callable used to initialize worker processes.
+ initargs: A tuple of arguments to pass to the initializer.
+ """
+ _check_system_limits()
+
+ if max_workers is None:
+ self._max_workers = os.cpu_count() or 1
+ if sys.platform == 'win32':
+ self._max_workers = min(_MAX_WINDOWS_WORKERS,
+ self._max_workers)
+ else:
+ if max_workers <= 0:
+ raise ValueError("max_workers must be greater than 0")
+ elif (sys.platform == 'win32' and
+ max_workers > _MAX_WINDOWS_WORKERS):
+ raise ValueError(
+ f"max_workers must be <= {_MAX_WINDOWS_WORKERS}")
+
+ self._max_workers = max_workers
+
+ if mp_context is None:
+ mp_context = mp.get_context()
+ self._mp_context = mp_context
+
+ if initializer is not None and not callable(initializer):
+ raise TypeError("initializer must be a callable")
+ self._initializer = initializer
+ self._initargs = initargs
+
+ # Management thread
+ self._queue_management_thread = None
+
+ # Map of pids to processes
+ self._processes = {}
+
+ # Shutdown is a two-step process.
+ self._shutdown_thread = False
+ self._shutdown_lock = threading.Lock()
+ self._broken = False
+ self._queue_count = 0
+ self._pending_work_items = {}
+
+ # Create communication channels for the executor
+ # Make the call queue slightly larger than the number of processes to
+ # prevent the worker processes from idling. But don't make it too big
+ # because futures in the call queue cannot be cancelled.
+ queue_size = self._max_workers + EXTRA_QUEUED_CALLS
+ self._call_queue = _SafeQueue(
+ max_size=queue_size, ctx=self._mp_context,
+ pending_work_items=self._pending_work_items)
+ # Killed worker processes can produce spurious "broken pipe"
+ # tracebacks in the queue's own worker thread. But we detect killed
+ # processes anyway, so silence the tracebacks.
+ self._call_queue._ignore_epipe = True
+ self._result_queue = mp_context.SimpleQueue()
+ self._work_ids = queue.Queue()
+
+ # _ThreadWakeup is a communication channel used to interrupt the wait
+ # of the main loop of queue_manager_thread from another thread (e.g.
+ # when calling executor.submit or executor.shutdown). We do not use the
+ # _result_queue to send the wakeup signal to the queue_manager_thread
+ # as it could result in a deadlock if a worker process dies with the
+ # _result_queue write lock still acquired.
+ self._queue_management_thread_wakeup = _ThreadWakeup()
+
+ def _start_queue_management_thread(self):
+ if self._queue_management_thread is None:
+ # When the executor gets garbarge collected, the weakref callback
+ # will wake up the queue management thread so that it can terminate
+ # if there is no pending work item.
+ def weakref_cb(_,
+ thread_wakeup=self._queue_management_thread_wakeup):
+ mp.util.debug('Executor collected: triggering callback for'
+ ' QueueManager wakeup')
+ thread_wakeup.wakeup()
+ # Start the processes so that their sentinels are known.
+ self._adjust_process_count()
+ self._queue_management_thread = threading.Thread(
+ target=_queue_management_worker,
+ args=(weakref.ref(self, weakref_cb),
+ self._processes,
+ self._pending_work_items,
+ self._work_ids,
+ self._call_queue,
+ self._result_queue,
+ self._queue_management_thread_wakeup),
+ name="QueueManagerThread")
+ self._queue_management_thread.daemon = True
+ self._queue_management_thread.start()
+ _threads_wakeups[self._queue_management_thread] = \
+ self._queue_management_thread_wakeup
+
+ def _adjust_process_count(self):
+ for _ in range(len(self._processes), self._max_workers):
+ p = self._mp_context.Process(
+ target=_process_worker,
+ args=(self._call_queue,
+ self._result_queue,
+ self._initializer,
+ self._initargs))
+ p.start()
+ self._processes[p.pid] = p
+
+ def submit(*args, **kwargs):
+ if len(args) >= 2:
+ self, fn, *args = args
+ elif not args:
+ raise TypeError("descriptor 'submit' of 'ProcessPoolExecutor' object "
+ "needs an argument")
+ elif 'fn' in kwargs:
+ fn = kwargs.pop('fn')
+ self, *args = args
+ import warnings
+ warnings.warn("Passing 'fn' as keyword argument is deprecated",
+ DeprecationWarning, stacklevel=2)
+ else:
+ raise TypeError('submit expected at least 1 positional argument, '
+ 'got %d' % (len(args)-1))
+
+ with self._shutdown_lock:
+ if self._broken:
+ raise BrokenProcessPool(self._broken)
+ if self._shutdown_thread:
+ raise RuntimeError('cannot schedule new futures after shutdown')
+ if _global_shutdown:
+ raise RuntimeError('cannot schedule new futures after '
+ 'interpreter shutdown')
+
+ f = _base.Future()
+ w = _WorkItem(f, fn, args, kwargs)
+
+ self._pending_work_items[self._queue_count] = w
+ self._work_ids.put(self._queue_count)
+ self._queue_count += 1
+ # Wake up queue management thread
+ self._queue_management_thread_wakeup.wakeup()
+
+ self._start_queue_management_thread()
+ return f
+ submit.__text_signature__ = _base.Executor.submit.__text_signature__
+ submit.__doc__ = _base.Executor.submit.__doc__
+
+ def map(self, fn, *iterables, timeout=None, chunksize=1):
+ """Returns an iterator equivalent to map(fn, iter).
+
+ Args:
+ fn: A callable that will take as many arguments as there are
+ passed iterables.
+ timeout: The maximum number of seconds to wait. If None, then there
+ is no limit on the wait time.
+ chunksize: If greater than one, the iterables will be chopped into
+ chunks of size chunksize and submitted to the process pool.
+ If set to one, the items in the list will be sent one at a time.
+
+ Returns:
+ An iterator equivalent to: map(func, *iterables) but the calls may
+ be evaluated out-of-order.
+
+ Raises:
+ TimeoutError: If the entire result iterator could not be generated
+ before the given timeout.
+ Exception: If fn(*args) raises for any values.
+ """
+ if chunksize < 1:
+ raise ValueError("chunksize must be >= 1.")
+
+ results = super().map(partial(_process_chunk, fn),
+ _get_chunks(*iterables, chunksize=chunksize),
+ timeout=timeout)
+ return _chain_from_iterable_of_lists(results)
+
+ def shutdown(self, wait=True):
+ with self._shutdown_lock:
+ self._shutdown_thread = True
+ if self._queue_management_thread:
+ # Wake up queue management thread
+ self._queue_management_thread_wakeup.wakeup()
+ if wait:
+ self._queue_management_thread.join()
+ # To reduce the risk of opening too many files, remove references to
+ # objects that use file descriptors.
+ self._queue_management_thread = None
+ if self._call_queue is not None:
+ self._call_queue.close()
+ if wait:
+ self._call_queue.join_thread()
+ self._call_queue = None
+ self._result_queue = None
+ self._processes = None
+
+ if self._queue_management_thread_wakeup:
+ self._queue_management_thread_wakeup.close()
+ self._queue_management_thread_wakeup = None
+
+ shutdown.__doc__ = _base.Executor.shutdown.__doc__
+
+atexit.register(_python_exit)
diff --git a/dist/lib/concurrent/futures/thread.py b/dist/lib/concurrent/futures/thread.py
new file mode 100644
index 0000000..9e669b2
--- /dev/null
+++ b/dist/lib/concurrent/futures/thread.py
@@ -0,0 +1,237 @@
+# Copyright 2009 Brian Quinlan. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Implements ThreadPoolExecutor."""
+
+__author__ = 'Brian Quinlan (brian@sweetapp.com)'
+
+import atexit
+from concurrent.futures import _base
+import itertools
+import queue
+import threading
+import weakref
+import os
+
+# Workers are created as daemon threads. This is done to allow the interpreter
+# to exit when there are still idle threads in a ThreadPoolExecutor's thread
+# pool (i.e. shutdown() was not called). However, allowing workers to die with
+# the interpreter has two undesirable properties:
+# - The workers would still be running during interpreter shutdown,
+# meaning that they would fail in unpredictable ways.
+# - The workers could be killed while evaluating a work item, which could
+# be bad if the callable being evaluated has external side-effects e.g.
+# writing to a file.
+#
+# To work around this problem, an exit handler is installed which tells the
+# workers to exit when their work queues are empty and then waits until the
+# threads finish.
+
+_threads_queues = weakref.WeakKeyDictionary()
+_shutdown = False
+
+def _python_exit():
+ global _shutdown
+ _shutdown = True
+ items = list(_threads_queues.items())
+ for t, q in items:
+ q.put(None)
+ for t, q in items:
+ t.join()
+
+atexit.register(_python_exit)
+
+
+class _WorkItem(object):
+ def __init__(self, future, fn, args, kwargs):
+ self.future = future
+ self.fn = fn
+ self.args = args
+ self.kwargs = kwargs
+
+ def run(self):
+ if not self.future.set_running_or_notify_cancel():
+ return
+
+ try:
+ result = self.fn(*self.args, **self.kwargs)
+ except BaseException as exc:
+ self.future.set_exception(exc)
+ # Break a reference cycle with the exception 'exc'
+ self = None
+ else:
+ self.future.set_result(result)
+
+
+def _worker(executor_reference, work_queue, initializer, initargs):
+ if initializer is not None:
+ try:
+ initializer(*initargs)
+ except BaseException:
+ _base.LOGGER.critical('Exception in initializer:', exc_info=True)
+ executor = executor_reference()
+ if executor is not None:
+ executor._initializer_failed()
+ return
+ try:
+ while True:
+ work_item = work_queue.get(block=True)
+ if work_item is not None:
+ work_item.run()
+ # Delete references to object. See issue16284
+ del work_item
+
+ # attempt to increment idle count
+ executor = executor_reference()
+ if executor is not None:
+ executor._idle_semaphore.release()
+ del executor
+ continue
+
+ executor = executor_reference()
+ # Exit if:
+ # - The interpreter is shutting down OR
+ # - The executor that owns the worker has been collected OR
+ # - The executor that owns the worker has been shutdown.
+ if _shutdown or executor is None or executor._shutdown:
+ # Flag the executor as shutting down as early as possible if it
+ # is not gc-ed yet.
+ if executor is not None:
+ executor._shutdown = True
+ # Notice other workers
+ work_queue.put(None)
+ return
+ del executor
+ except BaseException:
+ _base.LOGGER.critical('Exception in worker', exc_info=True)
+
+
+class BrokenThreadPool(_base.BrokenExecutor):
+ """
+ Raised when a worker thread in a ThreadPoolExecutor failed initializing.
+ """
+
+
+class ThreadPoolExecutor(_base.Executor):
+
+ # Used to assign unique thread names when thread_name_prefix is not supplied.
+ _counter = itertools.count().__next__
+
+ def __init__(self, max_workers=None, thread_name_prefix='',
+ initializer=None, initargs=()):
+ """Initializes a new ThreadPoolExecutor instance.
+
+ Args:
+ max_workers: The maximum number of threads that can be used to
+ execute the given calls.
+ thread_name_prefix: An optional name prefix to give our threads.
+ initializer: A callable used to initialize worker threads.
+ initargs: A tuple of arguments to pass to the initializer.
+ """
+ if max_workers is None:
+ # ThreadPoolExecutor is often used to:
+ # * CPU bound task which releases GIL
+ # * I/O bound task (which releases GIL, of course)
+ #
+ # We use cpu_count + 4 for both types of tasks.
+ # But we limit it to 32 to avoid consuming surprisingly large resource
+ # on many core machine.
+ max_workers = min(32, (os.cpu_count() or 1) + 4)
+ if max_workers <= 0:
+ raise ValueError("max_workers must be greater than 0")
+
+ if initializer is not None and not callable(initializer):
+ raise TypeError("initializer must be a callable")
+
+ self._max_workers = max_workers
+ self._work_queue = queue.SimpleQueue()
+ self._idle_semaphore = threading.Semaphore(0)
+ self._threads = set()
+ self._broken = False
+ self._shutdown = False
+ self._shutdown_lock = threading.Lock()
+ self._thread_name_prefix = (thread_name_prefix or
+ ("ThreadPoolExecutor-%d" % self._counter()))
+ self._initializer = initializer
+ self._initargs = initargs
+
+ def submit(*args, **kwargs):
+ if len(args) >= 2:
+ self, fn, *args = args
+ elif not args:
+ raise TypeError("descriptor 'submit' of 'ThreadPoolExecutor' object "
+ "needs an argument")
+ elif 'fn' in kwargs:
+ fn = kwargs.pop('fn')
+ self, *args = args
+ import warnings
+ warnings.warn("Passing 'fn' as keyword argument is deprecated",
+ DeprecationWarning, stacklevel=2)
+ else:
+ raise TypeError('submit expected at least 1 positional argument, '
+ 'got %d' % (len(args)-1))
+
+ with self._shutdown_lock:
+ if self._broken:
+ raise BrokenThreadPool(self._broken)
+
+ if self._shutdown:
+ raise RuntimeError('cannot schedule new futures after shutdown')
+ if _shutdown:
+ raise RuntimeError('cannot schedule new futures after '
+ 'interpreter shutdown')
+
+ f = _base.Future()
+ w = _WorkItem(f, fn, args, kwargs)
+
+ self._work_queue.put(w)
+ self._adjust_thread_count()
+ return f
+ submit.__text_signature__ = _base.Executor.submit.__text_signature__
+ submit.__doc__ = _base.Executor.submit.__doc__
+
+ def _adjust_thread_count(self):
+ # if idle threads are available, don't spin new threads
+ if self._idle_semaphore.acquire(timeout=0):
+ return
+
+ # When the executor gets lost, the weakref callback will wake up
+ # the worker threads.
+ def weakref_cb(_, q=self._work_queue):
+ q.put(None)
+
+ num_threads = len(self._threads)
+ if num_threads < self._max_workers:
+ thread_name = '%s_%d' % (self._thread_name_prefix or self,
+ num_threads)
+ t = threading.Thread(name=thread_name, target=_worker,
+ args=(weakref.ref(self, weakref_cb),
+ self._work_queue,
+ self._initializer,
+ self._initargs))
+ t.daemon = True
+ t.start()
+ self._threads.add(t)
+ _threads_queues[t] = self._work_queue
+
+ def _initializer_failed(self):
+ with self._shutdown_lock:
+ self._broken = ('A thread initializer failed, the thread pool '
+ 'is not usable anymore')
+ # Drain work queue and mark pending futures failed
+ while True:
+ try:
+ work_item = self._work_queue.get_nowait()
+ except queue.Empty:
+ break
+ if work_item is not None:
+ work_item.future.set_exception(BrokenThreadPool(self._broken))
+
+ def shutdown(self, wait=True):
+ with self._shutdown_lock:
+ self._shutdown = True
+ self._work_queue.put(None)
+ if wait:
+ for t in self._threads:
+ t.join()
+ shutdown.__doc__ = _base.Executor.shutdown.__doc__
diff --git a/dist/lib/configparser.py b/dist/lib/configparser.py
new file mode 100644
index 0000000..924cc56
--- /dev/null
+++ b/dist/lib/configparser.py
@@ -0,0 +1,1363 @@
+"""Configuration file parser.
+
+A configuration file consists of sections, lead by a "[section]" header,
+and followed by "name: value" entries, with continuations and such in
+the style of RFC 822.
+
+Intrinsic defaults can be specified by passing them into the
+ConfigParser constructor as a dictionary.
+
+class:
+
+ConfigParser -- responsible for parsing a list of
+ configuration files, and managing the parsed database.
+
+ methods:
+
+ __init__(defaults=None, dict_type=_default_dict, allow_no_value=False,
+ delimiters=('=', ':'), comment_prefixes=('#', ';'),
+ inline_comment_prefixes=None, strict=True,
+ empty_lines_in_values=True, default_section='DEFAULT',
+ interpolation=, converters=):
+ Create the parser. When `defaults' is given, it is initialized into the
+ dictionary or intrinsic defaults. The keys must be strings, the values
+ must be appropriate for %()s string interpolation.
+
+ When `dict_type' is given, it will be used to create the dictionary
+ objects for the list of sections, for the options within a section, and
+ for the default values.
+
+ When `delimiters' is given, it will be used as the set of substrings
+ that divide keys from values.
+
+ When `comment_prefixes' is given, it will be used as the set of
+ substrings that prefix comments in empty lines. Comments can be
+ indented.
+
+ When `inline_comment_prefixes' is given, it will be used as the set of
+ substrings that prefix comments in non-empty lines.
+
+ When `strict` is True, the parser won't allow for any section or option
+ duplicates while reading from a single source (file, string or
+ dictionary). Default is True.
+
+ When `empty_lines_in_values' is False (default: True), each empty line
+ marks the end of an option. Otherwise, internal empty lines of
+ a multiline option are kept as part of the value.
+
+ When `allow_no_value' is True (default: False), options without
+ values are accepted; the value presented for these is None.
+
+ When `default_section' is given, the name of the special section is
+ named accordingly. By default it is called ``"DEFAULT"`` but this can
+ be customized to point to any other valid section name. Its current
+ value can be retrieved using the ``parser_instance.default_section``
+ attribute and may be modified at runtime.
+
+ When `interpolation` is given, it should be an Interpolation subclass
+ instance. It will be used as the handler for option value
+ pre-processing when using getters. RawConfigParser objects don't do
+ any sort of interpolation, whereas ConfigParser uses an instance of
+ BasicInterpolation. The library also provides a ``zc.buildbot``
+ inspired ExtendedInterpolation implementation.
+
+ When `converters` is given, it should be a dictionary where each key
+ represents the name of a type converter and each value is a callable
+ implementing the conversion from string to the desired datatype. Every
+ converter gets its corresponding get*() method on the parser object and
+ section proxies.
+
+ sections()
+ Return all the configuration section names, sans DEFAULT.
+
+ has_section(section)
+ Return whether the given section exists.
+
+ has_option(section, option)
+ Return whether the given option exists in the given section.
+
+ options(section)
+ Return list of configuration options for the named section.
+
+ read(filenames, encoding=None)
+ Read and parse the iterable of named configuration files, given by
+ name. A single filename is also allowed. Non-existing files
+ are ignored. Return list of successfully read files.
+
+ read_file(f, filename=None)
+ Read and parse one configuration file, given as a file object.
+ The filename defaults to f.name; it is only used in error
+ messages (if f has no `name' attribute, the string `??>' is used).
+
+ read_string(string)
+ Read configuration from a given string.
+
+ read_dict(dictionary)
+ Read configuration from a dictionary. Keys are section names,
+ values are dictionaries with keys and values that should be present
+ in the section. If the used dictionary type preserves order, sections
+ and their keys will be added in order. Values are automatically
+ converted to strings.
+
+ get(section, option, raw=False, vars=None, fallback=_UNSET)
+ Return a string value for the named option. All % interpolations are
+ expanded in the return values, based on the defaults passed into the
+ constructor and the DEFAULT section. Additional substitutions may be
+ provided using the `vars' argument, which must be a dictionary whose
+ contents override any pre-existing defaults. If `option' is a key in
+ `vars', the value from `vars' is used.
+
+ getint(section, options, raw=False, vars=None, fallback=_UNSET)
+ Like get(), but convert value to an integer.
+
+ getfloat(section, options, raw=False, vars=None, fallback=_UNSET)
+ Like get(), but convert value to a float.
+
+ getboolean(section, options, raw=False, vars=None, fallback=_UNSET)
+ Like get(), but convert value to a boolean (currently case
+ insensitively defined as 0, false, no, off for False, and 1, true,
+ yes, on for True). Returns False or True.
+
+ items(section=_UNSET, raw=False, vars=None)
+ If section is given, return a list of tuples with (name, value) for
+ each option in the section. Otherwise, return a list of tuples with
+ (section_name, section_proxy) for each section, including DEFAULTSECT.
+
+ remove_section(section)
+ Remove the given file section and all its options.
+
+ remove_option(section, option)
+ Remove the given option from the given section.
+
+ set(section, option, value)
+ Set the given option.
+
+ write(fp, space_around_delimiters=True)
+ Write the configuration state in .ini format. If
+ `space_around_delimiters' is True (the default), delimiters
+ between keys and values are surrounded by spaces.
+"""
+
+from collections.abc import MutableMapping
+from collections import ChainMap as _ChainMap
+import functools
+import io
+import itertools
+import os
+import re
+import sys
+import warnings
+
+__all__ = ["NoSectionError", "DuplicateOptionError", "DuplicateSectionError",
+ "NoOptionError", "InterpolationError", "InterpolationDepthError",
+ "InterpolationMissingOptionError", "InterpolationSyntaxError",
+ "ParsingError", "MissingSectionHeaderError",
+ "ConfigParser", "SafeConfigParser", "RawConfigParser",
+ "Interpolation", "BasicInterpolation", "ExtendedInterpolation",
+ "LegacyInterpolation", "SectionProxy", "ConverterMapping",
+ "DEFAULTSECT", "MAX_INTERPOLATION_DEPTH"]
+
+_default_dict = dict
+DEFAULTSECT = "DEFAULT"
+
+MAX_INTERPOLATION_DEPTH = 10
+
+
+
+# exception classes
+class Error(Exception):
+ """Base class for ConfigParser exceptions."""
+
+ def __init__(self, msg=''):
+ self.message = msg
+ Exception.__init__(self, msg)
+
+ def __repr__(self):
+ return self.message
+
+ __str__ = __repr__
+
+
+class NoSectionError(Error):
+ """Raised when no section matches a requested option."""
+
+ def __init__(self, section):
+ Error.__init__(self, 'No section: %r' % (section,))
+ self.section = section
+ self.args = (section, )
+
+
+class DuplicateSectionError(Error):
+ """Raised when a section is repeated in an input source.
+
+ Possible repetitions that raise this exception are: multiple creation
+ using the API or in strict parsers when a section is found more than once
+ in a single input file, string or dictionary.
+ """
+
+ def __init__(self, section, source=None, lineno=None):
+ msg = [repr(section), " already exists"]
+ if source is not None:
+ message = ["While reading from ", repr(source)]
+ if lineno is not None:
+ message.append(" [line {0:2d}]".format(lineno))
+ message.append(": section ")
+ message.extend(msg)
+ msg = message
+ else:
+ msg.insert(0, "Section ")
+ Error.__init__(self, "".join(msg))
+ self.section = section
+ self.source = source
+ self.lineno = lineno
+ self.args = (section, source, lineno)
+
+
+class DuplicateOptionError(Error):
+ """Raised by strict parsers when an option is repeated in an input source.
+
+ Current implementation raises this exception only when an option is found
+ more than once in a single file, string or dictionary.
+ """
+
+ def __init__(self, section, option, source=None, lineno=None):
+ msg = [repr(option), " in section ", repr(section),
+ " already exists"]
+ if source is not None:
+ message = ["While reading from ", repr(source)]
+ if lineno is not None:
+ message.append(" [line {0:2d}]".format(lineno))
+ message.append(": option ")
+ message.extend(msg)
+ msg = message
+ else:
+ msg.insert(0, "Option ")
+ Error.__init__(self, "".join(msg))
+ self.section = section
+ self.option = option
+ self.source = source
+ self.lineno = lineno
+ self.args = (section, option, source, lineno)
+
+
+class NoOptionError(Error):
+ """A requested option was not found."""
+
+ def __init__(self, option, section):
+ Error.__init__(self, "No option %r in section: %r" %
+ (option, section))
+ self.option = option
+ self.section = section
+ self.args = (option, section)
+
+
+class InterpolationError(Error):
+ """Base class for interpolation-related exceptions."""
+
+ def __init__(self, option, section, msg):
+ Error.__init__(self, msg)
+ self.option = option
+ self.section = section
+ self.args = (option, section, msg)
+
+
+class InterpolationMissingOptionError(InterpolationError):
+ """A string substitution required a setting which was not available."""
+
+ def __init__(self, option, section, rawval, reference):
+ msg = ("Bad value substitution: option {!r} in section {!r} contains "
+ "an interpolation key {!r} which is not a valid option name. "
+ "Raw value: {!r}".format(option, section, reference, rawval))
+ InterpolationError.__init__(self, option, section, msg)
+ self.reference = reference
+ self.args = (option, section, rawval, reference)
+
+
+class InterpolationSyntaxError(InterpolationError):
+ """Raised when the source text contains invalid syntax.
+
+ Current implementation raises this exception when the source text into
+ which substitutions are made does not conform to the required syntax.
+ """
+
+
+class InterpolationDepthError(InterpolationError):
+ """Raised when substitutions are nested too deeply."""
+
+ def __init__(self, option, section, rawval):
+ msg = ("Recursion limit exceeded in value substitution: option {!r} "
+ "in section {!r} contains an interpolation key which "
+ "cannot be substituted in {} steps. Raw value: {!r}"
+ "".format(option, section, MAX_INTERPOLATION_DEPTH,
+ rawval))
+ InterpolationError.__init__(self, option, section, msg)
+ self.args = (option, section, rawval)
+
+
+class ParsingError(Error):
+ """Raised when a configuration file does not follow legal syntax."""
+
+ def __init__(self, source=None, filename=None):
+ # Exactly one of `source'/`filename' arguments has to be given.
+ # `filename' kept for compatibility.
+ if filename and source:
+ raise ValueError("Cannot specify both `filename' and `source'. "
+ "Use `source'.")
+ elif not filename and not source:
+ raise ValueError("Required argument `source' not given.")
+ elif filename:
+ source = filename
+ Error.__init__(self, 'Source contains parsing errors: %r' % source)
+ self.source = source
+ self.errors = []
+ self.args = (source, )
+
+ @property
+ def filename(self):
+ """Deprecated, use `source'."""
+ warnings.warn(
+ "The 'filename' attribute will be removed in future versions. "
+ "Use 'source' instead.",
+ DeprecationWarning, stacklevel=2
+ )
+ return self.source
+
+ @filename.setter
+ def filename(self, value):
+ """Deprecated, user `source'."""
+ warnings.warn(
+ "The 'filename' attribute will be removed in future versions. "
+ "Use 'source' instead.",
+ DeprecationWarning, stacklevel=2
+ )
+ self.source = value
+
+ def append(self, lineno, line):
+ self.errors.append((lineno, line))
+ self.message += '\n\t[line %2d]: %s' % (lineno, line)
+
+
+class MissingSectionHeaderError(ParsingError):
+ """Raised when a key-value pair is found before any section header."""
+
+ def __init__(self, filename, lineno, line):
+ Error.__init__(
+ self,
+ 'File contains no section headers.\nfile: %r, line: %d\n%r' %
+ (filename, lineno, line))
+ self.source = filename
+ self.lineno = lineno
+ self.line = line
+ self.args = (filename, lineno, line)
+
+
+# Used in parser getters to indicate the default behaviour when a specific
+# option is not found it to raise an exception. Created to enable `None' as
+# a valid fallback value.
+_UNSET = object()
+
+
+class Interpolation:
+ """Dummy interpolation that passes the value through with no changes."""
+
+ def before_get(self, parser, section, option, value, defaults):
+ return value
+
+ def before_set(self, parser, section, option, value):
+ return value
+
+ def before_read(self, parser, section, option, value):
+ return value
+
+ def before_write(self, parser, section, option, value):
+ return value
+
+
+class BasicInterpolation(Interpolation):
+ """Interpolation as implemented in the classic ConfigParser.
+
+ The option values can contain format strings which refer to other values in
+ the same section, or values in the special default section.
+
+ For example:
+
+ something: %(dir)s/whatever
+
+ would resolve the "%(dir)s" to the value of dir. All reference
+ expansions are done late, on demand. If a user needs to use a bare % in
+ a configuration file, she can escape it by writing %%. Other % usage
+ is considered a user error and raises `InterpolationSyntaxError'."""
+
+ _KEYCRE = re.compile(r"%\(([^)]+)\)s")
+
+ def before_get(self, parser, section, option, value, defaults):
+ L = []
+ self._interpolate_some(parser, option, L, value, section, defaults, 1)
+ return ''.join(L)
+
+ def before_set(self, parser, section, option, value):
+ tmp_value = value.replace('%%', '') # escaped percent signs
+ tmp_value = self._KEYCRE.sub('', tmp_value) # valid syntax
+ if '%' in tmp_value:
+ raise ValueError("invalid interpolation syntax in %r at "
+ "position %d" % (value, tmp_value.find('%')))
+ return value
+
+ def _interpolate_some(self, parser, option, accum, rest, section, map,
+ depth):
+ rawval = parser.get(section, option, raw=True, fallback=rest)
+ if depth > MAX_INTERPOLATION_DEPTH:
+ raise InterpolationDepthError(option, section, rawval)
+ while rest:
+ p = rest.find("%")
+ if p < 0:
+ accum.append(rest)
+ return
+ if p > 0:
+ accum.append(rest[:p])
+ rest = rest[p:]
+ # p is no longer used
+ c = rest[1:2]
+ if c == "%":
+ accum.append("%")
+ rest = rest[2:]
+ elif c == "(":
+ m = self._KEYCRE.match(rest)
+ if m is None:
+ raise InterpolationSyntaxError(option, section,
+ "bad interpolation variable reference %r" % rest)
+ var = parser.optionxform(m.group(1))
+ rest = rest[m.end():]
+ try:
+ v = map[var]
+ except KeyError:
+ raise InterpolationMissingOptionError(
+ option, section, rawval, var) from None
+ if "%" in v:
+ self._interpolate_some(parser, option, accum, v,
+ section, map, depth + 1)
+ else:
+ accum.append(v)
+ else:
+ raise InterpolationSyntaxError(
+ option, section,
+ "'%%' must be followed by '%%' or '(', "
+ "found: %r" % (rest,))
+
+
+class ExtendedInterpolation(Interpolation):
+ """Advanced variant of interpolation, supports the syntax used by
+ `zc.buildout'. Enables interpolation between sections."""
+
+ _KEYCRE = re.compile(r"\$\{([^}]+)\}")
+
+ def before_get(self, parser, section, option, value, defaults):
+ L = []
+ self._interpolate_some(parser, option, L, value, section, defaults, 1)
+ return ''.join(L)
+
+ def before_set(self, parser, section, option, value):
+ tmp_value = value.replace('$$', '') # escaped dollar signs
+ tmp_value = self._KEYCRE.sub('', tmp_value) # valid syntax
+ if '$' in tmp_value:
+ raise ValueError("invalid interpolation syntax in %r at "
+ "position %d" % (value, tmp_value.find('$')))
+ return value
+
+ def _interpolate_some(self, parser, option, accum, rest, section, map,
+ depth):
+ rawval = parser.get(section, option, raw=True, fallback=rest)
+ if depth > MAX_INTERPOLATION_DEPTH:
+ raise InterpolationDepthError(option, section, rawval)
+ while rest:
+ p = rest.find("$")
+ if p < 0:
+ accum.append(rest)
+ return
+ if p > 0:
+ accum.append(rest[:p])
+ rest = rest[p:]
+ # p is no longer used
+ c = rest[1:2]
+ if c == "$":
+ accum.append("$")
+ rest = rest[2:]
+ elif c == "{":
+ m = self._KEYCRE.match(rest)
+ if m is None:
+ raise InterpolationSyntaxError(option, section,
+ "bad interpolation variable reference %r" % rest)
+ path = m.group(1).split(':')
+ rest = rest[m.end():]
+ sect = section
+ opt = option
+ try:
+ if len(path) == 1:
+ opt = parser.optionxform(path[0])
+ v = map[opt]
+ elif len(path) == 2:
+ sect = path[0]
+ opt = parser.optionxform(path[1])
+ v = parser.get(sect, opt, raw=True)
+ else:
+ raise InterpolationSyntaxError(
+ option, section,
+ "More than one ':' found: %r" % (rest,))
+ except (KeyError, NoSectionError, NoOptionError):
+ raise InterpolationMissingOptionError(
+ option, section, rawval, ":".join(path)) from None
+ if "$" in v:
+ self._interpolate_some(parser, opt, accum, v, sect,
+ dict(parser.items(sect, raw=True)),
+ depth + 1)
+ else:
+ accum.append(v)
+ else:
+ raise InterpolationSyntaxError(
+ option, section,
+ "'$' must be followed by '$' or '{', "
+ "found: %r" % (rest,))
+
+
+class LegacyInterpolation(Interpolation):
+ """Deprecated interpolation used in old versions of ConfigParser.
+ Use BasicInterpolation or ExtendedInterpolation instead."""
+
+ _KEYCRE = re.compile(r"%\(([^)]*)\)s|.")
+
+ def before_get(self, parser, section, option, value, vars):
+ rawval = value
+ depth = MAX_INTERPOLATION_DEPTH
+ while depth: # Loop through this until it's done
+ depth -= 1
+ if value and "%(" in value:
+ replace = functools.partial(self._interpolation_replace,
+ parser=parser)
+ value = self._KEYCRE.sub(replace, value)
+ try:
+ value = value % vars
+ except KeyError as e:
+ raise InterpolationMissingOptionError(
+ option, section, rawval, e.args[0]) from None
+ else:
+ break
+ if value and "%(" in value:
+ raise InterpolationDepthError(option, section, rawval)
+ return value
+
+ def before_set(self, parser, section, option, value):
+ return value
+
+ @staticmethod
+ def _interpolation_replace(match, parser):
+ s = match.group(1)
+ if s is None:
+ return match.group()
+ else:
+ return "%%(%s)s" % parser.optionxform(s)
+
+
+class RawConfigParser(MutableMapping):
+ """ConfigParser that does not do interpolation."""
+
+ # Regular expressions for parsing section headers and options
+ _SECT_TMPL = r"""
+ \[ # [
+ (?P[^]]+) # very permissive!
+ \] # ]
+ """
+ _OPT_TMPL = r"""
+ (?P.*?) # very permissive!
+ \s*(?P{delim})\s* # any number of space/tab,
+ # followed by any of the
+ # allowed delimiters,
+ # followed by any space/tab
+ (?P.*)$ # everything up to eol
+ """
+ _OPT_NV_TMPL = r"""
+ (?P.*?) # very permissive!
+ \s*(?: # any number of space/tab,
+ (?P{delim})\s* # optionally followed by
+ # any of the allowed
+ # delimiters, followed by any
+ # space/tab
+ (?P.*))?$ # everything up to eol
+ """
+ # Interpolation algorithm to be used if the user does not specify another
+ _DEFAULT_INTERPOLATION = Interpolation()
+ # Compiled regular expression for matching sections
+ SECTCRE = re.compile(_SECT_TMPL, re.VERBOSE)
+ # Compiled regular expression for matching options with typical separators
+ OPTCRE = re.compile(_OPT_TMPL.format(delim="=|:"), re.VERBOSE)
+ # Compiled regular expression for matching options with optional values
+ # delimited using typical separators
+ OPTCRE_NV = re.compile(_OPT_NV_TMPL.format(delim="=|:"), re.VERBOSE)
+ # Compiled regular expression for matching leading whitespace in a line
+ NONSPACECRE = re.compile(r"\S")
+ # Possible boolean values in the configuration.
+ BOOLEAN_STATES = {'1': True, 'yes': True, 'true': True, 'on': True,
+ '0': False, 'no': False, 'false': False, 'off': False}
+
+ def __init__(self, defaults=None, dict_type=_default_dict,
+ allow_no_value=False, *, delimiters=('=', ':'),
+ comment_prefixes=('#', ';'), inline_comment_prefixes=None,
+ strict=True, empty_lines_in_values=True,
+ default_section=DEFAULTSECT,
+ interpolation=_UNSET, converters=_UNSET):
+
+ self._dict = dict_type
+ self._sections = self._dict()
+ self._defaults = self._dict()
+ self._converters = ConverterMapping(self)
+ self._proxies = self._dict()
+ self._proxies[default_section] = SectionProxy(self, default_section)
+ self._delimiters = tuple(delimiters)
+ if delimiters == ('=', ':'):
+ self._optcre = self.OPTCRE_NV if allow_no_value else self.OPTCRE
+ else:
+ d = "|".join(re.escape(d) for d in delimiters)
+ if allow_no_value:
+ self._optcre = re.compile(self._OPT_NV_TMPL.format(delim=d),
+ re.VERBOSE)
+ else:
+ self._optcre = re.compile(self._OPT_TMPL.format(delim=d),
+ re.VERBOSE)
+ self._comment_prefixes = tuple(comment_prefixes or ())
+ self._inline_comment_prefixes = tuple(inline_comment_prefixes or ())
+ self._strict = strict
+ self._allow_no_value = allow_no_value
+ self._empty_lines_in_values = empty_lines_in_values
+ self.default_section=default_section
+ self._interpolation = interpolation
+ if self._interpolation is _UNSET:
+ self._interpolation = self._DEFAULT_INTERPOLATION
+ if self._interpolation is None:
+ self._interpolation = Interpolation()
+ if converters is not _UNSET:
+ self._converters.update(converters)
+ if defaults:
+ self._read_defaults(defaults)
+
+ def defaults(self):
+ return self._defaults
+
+ def sections(self):
+ """Return a list of section names, excluding [DEFAULT]"""
+ # self._sections will never have [DEFAULT] in it
+ return list(self._sections.keys())
+
+ def add_section(self, section):
+ """Create a new section in the configuration.
+
+ Raise DuplicateSectionError if a section by the specified name
+ already exists. Raise ValueError if name is DEFAULT.
+ """
+ if section == self.default_section:
+ raise ValueError('Invalid section name: %r' % section)
+
+ if section in self._sections:
+ raise DuplicateSectionError(section)
+ self._sections[section] = self._dict()
+ self._proxies[section] = SectionProxy(self, section)
+
+ def has_section(self, section):
+ """Indicate whether the named section is present in the configuration.
+
+ The DEFAULT section is not acknowledged.
+ """
+ return section in self._sections
+
+ def options(self, section):
+ """Return a list of option names for the given section name."""
+ try:
+ opts = self._sections[section].copy()
+ except KeyError:
+ raise NoSectionError(section) from None
+ opts.update(self._defaults)
+ return list(opts.keys())
+
+ def read(self, filenames, encoding=None):
+ """Read and parse a filename or an iterable of filenames.
+
+ Files that cannot be opened are silently ignored; this is
+ designed so that you can specify an iterable of potential
+ configuration file locations (e.g. current directory, user's
+ home directory, systemwide directory), and all existing
+ configuration files in the iterable will be read. A single
+ filename may also be given.
+
+ Return list of successfully read files.
+ """
+ if isinstance(filenames, (str, bytes, os.PathLike)):
+ filenames = [filenames]
+ read_ok = []
+ for filename in filenames:
+ try:
+ with open(filename, encoding=encoding) as fp:
+ self._read(fp, filename)
+ except OSError:
+ continue
+ if isinstance(filename, os.PathLike):
+ filename = os.fspath(filename)
+ read_ok.append(filename)
+ return read_ok
+
+ def read_file(self, f, source=None):
+ """Like read() but the argument must be a file-like object.
+
+ The `f' argument must be iterable, returning one line at a time.
+ Optional second argument is the `source' specifying the name of the
+ file being read. If not given, it is taken from f.name. If `f' has no
+ `name' attribute, `??>' is used.
+ """
+ if source is None:
+ try:
+ source = f.name
+ except AttributeError:
+ source = '??>'
+ self._read(f, source)
+
+ def read_string(self, string, source=''):
+ """Read configuration from a given string."""
+ sfile = io.StringIO(string)
+ self.read_file(sfile, source)
+
+ def read_dict(self, dictionary, source=''):
+ """Read configuration from a dictionary.
+
+ Keys are section names, values are dictionaries with keys and values
+ that should be present in the section. If the used dictionary type
+ preserves order, sections and their keys will be added in order.
+
+ All types held in the dictionary are converted to strings during
+ reading, including section names, option names and keys.
+
+ Optional second argument is the `source' specifying the name of the
+ dictionary being read.
+ """
+ elements_added = set()
+ for section, keys in dictionary.items():
+ section = str(section)
+ try:
+ self.add_section(section)
+ except (DuplicateSectionError, ValueError):
+ if self._strict and section in elements_added:
+ raise
+ elements_added.add(section)
+ for key, value in keys.items():
+ key = self.optionxform(str(key))
+ if value is not None:
+ value = str(value)
+ if self._strict and (section, key) in elements_added:
+ raise DuplicateOptionError(section, key, source)
+ elements_added.add((section, key))
+ self.set(section, key, value)
+
+ def readfp(self, fp, filename=None):
+ """Deprecated, use read_file instead."""
+ warnings.warn(
+ "This method will be removed in future versions. "
+ "Use 'parser.read_file()' instead.",
+ DeprecationWarning, stacklevel=2
+ )
+ self.read_file(fp, source=filename)
+
+ def get(self, section, option, *, raw=False, vars=None, fallback=_UNSET):
+ """Get an option value for a given section.
+
+ If `vars' is provided, it must be a dictionary. The option is looked up
+ in `vars' (if provided), `section', and in `DEFAULTSECT' in that order.
+ If the key is not found and `fallback' is provided, it is used as
+ a fallback value. `None' can be provided as a `fallback' value.
+
+ If interpolation is enabled and the optional argument `raw' is False,
+ all interpolations are expanded in the return values.
+
+ Arguments `raw', `vars', and `fallback' are keyword only.
+
+ The section DEFAULT is special.
+ """
+ try:
+ d = self._unify_values(section, vars)
+ except NoSectionError:
+ if fallback is _UNSET:
+ raise
+ else:
+ return fallback
+ option = self.optionxform(option)
+ try:
+ value = d[option]
+ except KeyError:
+ if fallback is _UNSET:
+ raise NoOptionError(option, section)
+ else:
+ return fallback
+
+ if raw or value is None:
+ return value
+ else:
+ return self._interpolation.before_get(self, section, option, value,
+ d)
+
+ def _get(self, section, conv, option, **kwargs):
+ return conv(self.get(section, option, **kwargs))
+
+ def _get_conv(self, section, option, conv, *, raw=False, vars=None,
+ fallback=_UNSET, **kwargs):
+ try:
+ return self._get(section, conv, option, raw=raw, vars=vars,
+ **kwargs)
+ except (NoSectionError, NoOptionError):
+ if fallback is _UNSET:
+ raise
+ return fallback
+
+ # getint, getfloat and getboolean provided directly for backwards compat
+ def getint(self, section, option, *, raw=False, vars=None,
+ fallback=_UNSET, **kwargs):
+ return self._get_conv(section, option, int, raw=raw, vars=vars,
+ fallback=fallback, **kwargs)
+
+ def getfloat(self, section, option, *, raw=False, vars=None,
+ fallback=_UNSET, **kwargs):
+ return self._get_conv(section, option, float, raw=raw, vars=vars,
+ fallback=fallback, **kwargs)
+
+ def getboolean(self, section, option, *, raw=False, vars=None,
+ fallback=_UNSET, **kwargs):
+ return self._get_conv(section, option, self._convert_to_boolean,
+ raw=raw, vars=vars, fallback=fallback, **kwargs)
+
+ def items(self, section=_UNSET, raw=False, vars=None):
+ """Return a list of (name, value) tuples for each option in a section.
+
+ All % interpolations are expanded in the return values, based on the
+ defaults passed into the constructor, unless the optional argument
+ `raw' is true. Additional substitutions may be provided using the
+ `vars' argument, which must be a dictionary whose contents overrides
+ any pre-existing defaults.
+
+ The section DEFAULT is special.
+ """
+ if section is _UNSET:
+ return super().items()
+ d = self._defaults.copy()
+ try:
+ d.update(self._sections[section])
+ except KeyError:
+ if section != self.default_section:
+ raise NoSectionError(section)
+ orig_keys = list(d.keys())
+ # Update with the entry specific variables
+ if vars:
+ for key, value in vars.items():
+ d[self.optionxform(key)] = value
+ value_getter = lambda option: self._interpolation.before_get(self,
+ section, option, d[option], d)
+ if raw:
+ value_getter = lambda option: d[option]
+ return [(option, value_getter(option)) for option in orig_keys]
+
+ def popitem(self):
+ """Remove a section from the parser and return it as
+ a (section_name, section_proxy) tuple. If no section is present, raise
+ KeyError.
+
+ The section DEFAULT is never returned because it cannot be removed.
+ """
+ for key in self.sections():
+ value = self[key]
+ del self[key]
+ return key, value
+ raise KeyError
+
+ def optionxform(self, optionstr):
+ return optionstr.lower()
+
+ def has_option(self, section, option):
+ """Check for the existence of a given option in a given section.
+ If the specified `section' is None or an empty string, DEFAULT is
+ assumed. If the specified `section' does not exist, returns False."""
+ if not section or section == self.default_section:
+ option = self.optionxform(option)
+ return option in self._defaults
+ elif section not in self._sections:
+ return False
+ else:
+ option = self.optionxform(option)
+ return (option in self._sections[section]
+ or option in self._defaults)
+
+ def set(self, section, option, value=None):
+ """Set an option."""
+ if value:
+ value = self._interpolation.before_set(self, section, option,
+ value)
+ if not section or section == self.default_section:
+ sectdict = self._defaults
+ else:
+ try:
+ sectdict = self._sections[section]
+ except KeyError:
+ raise NoSectionError(section) from None
+ sectdict[self.optionxform(option)] = value
+
+ def write(self, fp, space_around_delimiters=True):
+ """Write an .ini-format representation of the configuration state.
+
+ If `space_around_delimiters' is True (the default), delimiters
+ between keys and values are surrounded by spaces.
+ """
+ if space_around_delimiters:
+ d = " {} ".format(self._delimiters[0])
+ else:
+ d = self._delimiters[0]
+ if self._defaults:
+ self._write_section(fp, self.default_section,
+ self._defaults.items(), d)
+ for section in self._sections:
+ self._write_section(fp, section,
+ self._sections[section].items(), d)
+
+ def _write_section(self, fp, section_name, section_items, delimiter):
+ """Write a single section to the specified `fp'."""
+ fp.write("[{}]\n".format(section_name))
+ for key, value in section_items:
+ value = self._interpolation.before_write(self, section_name, key,
+ value)
+ if value is not None or not self._allow_no_value:
+ value = delimiter + str(value).replace('\n', '\n\t')
+ else:
+ value = ""
+ fp.write("{}{}\n".format(key, value))
+ fp.write("\n")
+
+ def remove_option(self, section, option):
+ """Remove an option."""
+ if not section or section == self.default_section:
+ sectdict = self._defaults
+ else:
+ try:
+ sectdict = self._sections[section]
+ except KeyError:
+ raise NoSectionError(section) from None
+ option = self.optionxform(option)
+ existed = option in sectdict
+ if existed:
+ del sectdict[option]
+ return existed
+
+ def remove_section(self, section):
+ """Remove a file section."""
+ existed = section in self._sections
+ if existed:
+ del self._sections[section]
+ del self._proxies[section]
+ return existed
+
+ def __getitem__(self, key):
+ if key != self.default_section and not self.has_section(key):
+ raise KeyError(key)
+ return self._proxies[key]
+
+ def __setitem__(self, key, value):
+ # To conform with the mapping protocol, overwrites existing values in
+ # the section.
+ if key in self and self[key] is value:
+ return
+ # XXX this is not atomic if read_dict fails at any point. Then again,
+ # no update method in configparser is atomic in this implementation.
+ if key == self.default_section:
+ self._defaults.clear()
+ elif key in self._sections:
+ self._sections[key].clear()
+ self.read_dict({key: value})
+
+ def __delitem__(self, key):
+ if key == self.default_section:
+ raise ValueError("Cannot remove the default section.")
+ if not self.has_section(key):
+ raise KeyError(key)
+ self.remove_section(key)
+
+ def __contains__(self, key):
+ return key == self.default_section or self.has_section(key)
+
+ def __len__(self):
+ return len(self._sections) + 1 # the default section
+
+ def __iter__(self):
+ # XXX does it break when underlying container state changed?
+ return itertools.chain((self.default_section,), self._sections.keys())
+
+ def _read(self, fp, fpname):
+ """Parse a sectioned configuration file.
+
+ Each section in a configuration file contains a header, indicated by
+ a name in square brackets (`[]'), plus key/value options, indicated by
+ `name' and `value' delimited with a specific substring (`=' or `:' by
+ default).
+
+ Values can span multiple lines, as long as they are indented deeper
+ than the first line of the value. Depending on the parser's mode, blank
+ lines may be treated as parts of multiline values or ignored.
+
+ Configuration files may include comments, prefixed by specific
+ characters (`#' and `;' by default). Comments may appear on their own
+ in an otherwise empty line or may be entered in lines holding values or
+ section names.
+ """
+ elements_added = set()
+ cursect = None # None, or a dictionary
+ sectname = None
+ optname = None
+ lineno = 0
+ indent_level = 0
+ e = None # None, or an exception
+ for lineno, line in enumerate(fp, start=1):
+ comment_start = sys.maxsize
+ # strip inline comments
+ inline_prefixes = {p: -1 for p in self._inline_comment_prefixes}
+ while comment_start == sys.maxsize and inline_prefixes:
+ next_prefixes = {}
+ for prefix, index in inline_prefixes.items():
+ index = line.find(prefix, index+1)
+ if index == -1:
+ continue
+ next_prefixes[prefix] = index
+ if index == 0 or (index > 0 and line[index-1].isspace()):
+ comment_start = min(comment_start, index)
+ inline_prefixes = next_prefixes
+ # strip full line comments
+ for prefix in self._comment_prefixes:
+ if line.strip().startswith(prefix):
+ comment_start = 0
+ break
+ if comment_start == sys.maxsize:
+ comment_start = None
+ value = line[:comment_start].strip()
+ if not value:
+ if self._empty_lines_in_values:
+ # add empty line to the value, but only if there was no
+ # comment on the line
+ if (comment_start is None and
+ cursect is not None and
+ optname and
+ cursect[optname] is not None):
+ cursect[optname].append('') # newlines added at join
+ else:
+ # empty line marks end of value
+ indent_level = sys.maxsize
+ continue
+ # continuation line?
+ first_nonspace = self.NONSPACECRE.search(line)
+ cur_indent_level = first_nonspace.start() if first_nonspace else 0
+ if (cursect is not None and optname and
+ cur_indent_level > indent_level):
+ cursect[optname].append(value)
+ # a section header or option header?
+ else:
+ indent_level = cur_indent_level
+ # is it a section header?
+ mo = self.SECTCRE.match(value)
+ if mo:
+ sectname = mo.group('header')
+ if sectname in self._sections:
+ if self._strict and sectname in elements_added:
+ raise DuplicateSectionError(sectname, fpname,
+ lineno)
+ cursect = self._sections[sectname]
+ elements_added.add(sectname)
+ elif sectname == self.default_section:
+ cursect = self._defaults
+ else:
+ cursect = self._dict()
+ self._sections[sectname] = cursect
+ self._proxies[sectname] = SectionProxy(self, sectname)
+ elements_added.add(sectname)
+ # So sections can't start with a continuation line
+ optname = None
+ # no section header in the file?
+ elif cursect is None:
+ raise MissingSectionHeaderError(fpname, lineno, line)
+ # an option line?
+ else:
+ mo = self._optcre.match(value)
+ if mo:
+ optname, vi, optval = mo.group('option', 'vi', 'value')
+ if not optname:
+ e = self._handle_error(e, fpname, lineno, line)
+ optname = self.optionxform(optname.rstrip())
+ if (self._strict and
+ (sectname, optname) in elements_added):
+ raise DuplicateOptionError(sectname, optname,
+ fpname, lineno)
+ elements_added.add((sectname, optname))
+ # This check is fine because the OPTCRE cannot
+ # match if it would set optval to None
+ if optval is not None:
+ optval = optval.strip()
+ cursect[optname] = [optval]
+ else:
+ # valueless option handling
+ cursect[optname] = None
+ else:
+ # a non-fatal parsing error occurred. set up the
+ # exception but keep going. the exception will be
+ # raised at the end of the file and will contain a
+ # list of all bogus lines
+ e = self._handle_error(e, fpname, lineno, line)
+ self._join_multiline_values()
+ # if any parsing errors occurred, raise an exception
+ if e:
+ raise e
+
+ def _join_multiline_values(self):
+ defaults = self.default_section, self._defaults
+ all_sections = itertools.chain((defaults,),
+ self._sections.items())
+ for section, options in all_sections:
+ for name, val in options.items():
+ if isinstance(val, list):
+ val = '\n'.join(val).rstrip()
+ options[name] = self._interpolation.before_read(self,
+ section,
+ name, val)
+
+ def _read_defaults(self, defaults):
+ """Read the defaults passed in the initializer.
+ Note: values can be non-string."""
+ for key, value in defaults.items():
+ self._defaults[self.optionxform(key)] = value
+
+ def _handle_error(self, exc, fpname, lineno, line):
+ if not exc:
+ exc = ParsingError(fpname)
+ exc.append(lineno, repr(line))
+ return exc
+
+ def _unify_values(self, section, vars):
+ """Create a sequence of lookups with 'vars' taking priority over
+ the 'section' which takes priority over the DEFAULTSECT.
+
+ """
+ sectiondict = {}
+ try:
+ sectiondict = self._sections[section]
+ except KeyError:
+ if section != self.default_section:
+ raise NoSectionError(section) from None
+ # Update with the entry specific variables
+ vardict = {}
+ if vars:
+ for key, value in vars.items():
+ if value is not None:
+ value = str(value)
+ vardict[self.optionxform(key)] = value
+ return _ChainMap(vardict, sectiondict, self._defaults)
+
+ def _convert_to_boolean(self, value):
+ """Return a boolean value translating from other types if necessary.
+ """
+ if value.lower() not in self.BOOLEAN_STATES:
+ raise ValueError('Not a boolean: %s' % value)
+ return self.BOOLEAN_STATES[value.lower()]
+
+ def _validate_value_types(self, *, section="", option="", value=""):
+ """Raises a TypeError for non-string values.
+
+ The only legal non-string value if we allow valueless
+ options is None, so we need to check if the value is a
+ string if:
+ - we do not allow valueless options, or
+ - we allow valueless options but the value is not None
+
+ For compatibility reasons this method is not used in classic set()
+ for RawConfigParsers. It is invoked in every case for mapping protocol
+ access and in ConfigParser.set().
+ """
+ if not isinstance(section, str):
+ raise TypeError("section names must be strings")
+ if not isinstance(option, str):
+ raise TypeError("option keys must be strings")
+ if not self._allow_no_value or value:
+ if not isinstance(value, str):
+ raise TypeError("option values must be strings")
+
+ @property
+ def converters(self):
+ return self._converters
+
+
+class ConfigParser(RawConfigParser):
+ """ConfigParser implementing interpolation."""
+
+ _DEFAULT_INTERPOLATION = BasicInterpolation()
+
+ def set(self, section, option, value=None):
+ """Set an option. Extends RawConfigParser.set by validating type and
+ interpolation syntax on the value."""
+ self._validate_value_types(option=option, value=value)
+ super().set(section, option, value)
+
+ def add_section(self, section):
+ """Create a new section in the configuration. Extends
+ RawConfigParser.add_section by validating if the section name is
+ a string."""
+ self._validate_value_types(section=section)
+ super().add_section(section)
+
+ def _read_defaults(self, defaults):
+ """Reads the defaults passed in the initializer, implicitly converting
+ values to strings like the rest of the API.
+
+ Does not perform interpolation for backwards compatibility.
+ """
+ try:
+ hold_interpolation = self._interpolation
+ self._interpolation = Interpolation()
+ self.read_dict({self.default_section: defaults})
+ finally:
+ self._interpolation = hold_interpolation
+
+
+class SafeConfigParser(ConfigParser):
+ """ConfigParser alias for backwards compatibility purposes."""
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ warnings.warn(
+ "The SafeConfigParser class has been renamed to ConfigParser "
+ "in Python 3.2. This alias will be removed in future versions."
+ " Use ConfigParser directly instead.",
+ DeprecationWarning, stacklevel=2
+ )
+
+
+class SectionProxy(MutableMapping):
+ """A proxy for a single section from a parser."""
+
+ def __init__(self, parser, name):
+ """Creates a view on a section of the specified `name` in `parser`."""
+ self._parser = parser
+ self._name = name
+ for conv in parser.converters:
+ key = 'get' + conv
+ getter = functools.partial(self.get, _impl=getattr(parser, key))
+ setattr(self, key, getter)
+
+ def __repr__(self):
+ return ''.format(self._name)
+
+ def __getitem__(self, key):
+ if not self._parser.has_option(self._name, key):
+ raise KeyError(key)
+ return self._parser.get(self._name, key)
+
+ def __setitem__(self, key, value):
+ self._parser._validate_value_types(option=key, value=value)
+ return self._parser.set(self._name, key, value)
+
+ def __delitem__(self, key):
+ if not (self._parser.has_option(self._name, key) and
+ self._parser.remove_option(self._name, key)):
+ raise KeyError(key)
+
+ def __contains__(self, key):
+ return self._parser.has_option(self._name, key)
+
+ def __len__(self):
+ return len(self._options())
+
+ def __iter__(self):
+ return self._options().__iter__()
+
+ def _options(self):
+ if self._name != self._parser.default_section:
+ return self._parser.options(self._name)
+ else:
+ return self._parser.defaults()
+
+ @property
+ def parser(self):
+ # The parser object of the proxy is read-only.
+ return self._parser
+
+ @property
+ def name(self):
+ # The name of the section on a proxy is read-only.
+ return self._name
+
+ def get(self, option, fallback=None, *, raw=False, vars=None,
+ _impl=None, **kwargs):
+ """Get an option value.
+
+ Unless `fallback` is provided, `None` will be returned if the option
+ is not found.
+
+ """
+ # If `_impl` is provided, it should be a getter method on the parser
+ # object that provides the desired type conversion.
+ if not _impl:
+ _impl = self._parser.get
+ return _impl(self._name, option, raw=raw, vars=vars,
+ fallback=fallback, **kwargs)
+
+
+class ConverterMapping(MutableMapping):
+ """Enables reuse of get*() methods between the parser and section proxies.
+
+ If a parser class implements a getter directly, the value for the given
+ key will be ``None``. The presence of the converter name here enables
+ section proxies to find and use the implementation on the parser class.
+ """
+
+ GETTERCRE = re.compile(r"^get(?P.+)$")
+
+ def __init__(self, parser):
+ self._parser = parser
+ self._data = {}
+ for getter in dir(self._parser):
+ m = self.GETTERCRE.match(getter)
+ if not m or not callable(getattr(self._parser, getter)):
+ continue
+ self._data[m.group('name')] = None # See class docstring.
+
+ def __getitem__(self, key):
+ return self._data[key]
+
+ def __setitem__(self, key, value):
+ try:
+ k = 'get' + key
+ except TypeError:
+ raise ValueError('Incompatible key: {} (type: {})'
+ ''.format(key, type(key)))
+ if k == 'get':
+ raise ValueError('Incompatible key: cannot use "" as a name')
+ self._data[key] = value
+ func = functools.partial(self._parser._get_conv, conv=value)
+ func.converter = value
+ setattr(self._parser, k, func)
+ for proxy in self._parser.values():
+ getter = functools.partial(proxy.get, _impl=func)
+ setattr(proxy, k, getter)
+
+ def __delitem__(self, key):
+ try:
+ k = 'get' + (key or None)
+ except TypeError:
+ raise KeyError(key)
+ del self._data[key]
+ for inst in itertools.chain((self._parser,), self._parser.values()):
+ try:
+ delattr(inst, k)
+ except AttributeError:
+ # don't raise since the entry was present in _data, silently
+ # clean up
+ continue
+
+ def __iter__(self):
+ return iter(self._data)
+
+ def __len__(self):
+ return len(self._data)
diff --git a/dist/lib/contextlib.py b/dist/lib/contextlib.py
new file mode 100644
index 0000000..94dc2bf
--- /dev/null
+++ b/dist/lib/contextlib.py
@@ -0,0 +1,704 @@
+"""Utilities for with-statement contexts. See PEP 343."""
+import abc
+import sys
+import _collections_abc
+from collections import deque
+from functools import wraps
+from types import MethodType
+
+__all__ = ["asynccontextmanager", "contextmanager", "closing", "nullcontext",
+ "AbstractContextManager", "AbstractAsyncContextManager",
+ "AsyncExitStack", "ContextDecorator", "ExitStack",
+ "redirect_stdout", "redirect_stderr", "suppress"]
+
+
+class AbstractContextManager(abc.ABC):
+
+ """An abstract base class for context managers."""
+
+ def __enter__(self):
+ """Return `self` upon entering the runtime context."""
+ return self
+
+ @abc.abstractmethod
+ def __exit__(self, exc_type, exc_value, traceback):
+ """Raise any exception triggered within the runtime context."""
+ return None
+
+ @classmethod
+ def __subclasshook__(cls, C):
+ if cls is AbstractContextManager:
+ return _collections_abc._check_methods(C, "__enter__", "__exit__")
+ return NotImplemented
+
+
+class AbstractAsyncContextManager(abc.ABC):
+
+ """An abstract base class for asynchronous context managers."""
+
+ async def __aenter__(self):
+ """Return `self` upon entering the runtime context."""
+ return self
+
+ @abc.abstractmethod
+ async def __aexit__(self, exc_type, exc_value, traceback):
+ """Raise any exception triggered within the runtime context."""
+ return None
+
+ @classmethod
+ def __subclasshook__(cls, C):
+ if cls is AbstractAsyncContextManager:
+ return _collections_abc._check_methods(C, "__aenter__",
+ "__aexit__")
+ return NotImplemented
+
+
+class ContextDecorator(object):
+ "A base class or mixin that enables context managers to work as decorators."
+
+ def _recreate_cm(self):
+ """Return a recreated instance of self.
+
+ Allows an otherwise one-shot context manager like
+ _GeneratorContextManager to support use as
+ a decorator via implicit recreation.
+
+ This is a private interface just for _GeneratorContextManager.
+ See issue #11647 for details.
+ """
+ return self
+
+ def __call__(self, func):
+ @wraps(func)
+ def inner(*args, **kwds):
+ with self._recreate_cm():
+ return func(*args, **kwds)
+ return inner
+
+
+class _GeneratorContextManagerBase:
+ """Shared functionality for @contextmanager and @asynccontextmanager."""
+
+ def __init__(self, func, args, kwds):
+ self.gen = func(*args, **kwds)
+ self.func, self.args, self.kwds = func, args, kwds
+ # Issue 19330: ensure context manager instances have good docstrings
+ doc = getattr(func, "__doc__", None)
+ if doc is None:
+ doc = type(self).__doc__
+ self.__doc__ = doc
+ # Unfortunately, this still doesn't provide good help output when
+ # inspecting the created context manager instances, since pydoc
+ # currently bypasses the instance docstring and shows the docstring
+ # for the class instead.
+ # See http://bugs.python.org/issue19404 for more details.
+
+
+class _GeneratorContextManager(_GeneratorContextManagerBase,
+ AbstractContextManager,
+ ContextDecorator):
+ """Helper for @contextmanager decorator."""
+
+ def _recreate_cm(self):
+ # _GCM instances are one-shot context managers, so the
+ # CM must be recreated each time a decorated function is
+ # called
+ return self.__class__(self.func, self.args, self.kwds)
+
+ def __enter__(self):
+ # do not keep args and kwds alive unnecessarily
+ # they are only needed for recreation, which is not possible anymore
+ del self.args, self.kwds, self.func
+ try:
+ return next(self.gen)
+ except StopIteration:
+ raise RuntimeError("generator didn't yield") from None
+
+ def __exit__(self, type, value, traceback):
+ if type is None:
+ try:
+ next(self.gen)
+ except StopIteration:
+ return False
+ else:
+ raise RuntimeError("generator didn't stop")
+ else:
+ if value is None:
+ # Need to force instantiation so we can reliably
+ # tell if we get the same exception back
+ value = type()
+ try:
+ self.gen.throw(type, value, traceback)
+ except StopIteration as exc:
+ # Suppress StopIteration *unless* it's the same exception that
+ # was passed to throw(). This prevents a StopIteration
+ # raised inside the "with" statement from being suppressed.
+ return exc is not value
+ except RuntimeError as exc:
+ # Don't re-raise the passed in exception. (issue27122)
+ if exc is value:
+ return False
+ # Likewise, avoid suppressing if a StopIteration exception
+ # was passed to throw() and later wrapped into a RuntimeError
+ # (see PEP 479).
+ if type is StopIteration and exc.__cause__ is value:
+ return False
+ raise
+ except:
+ # only re-raise if it's *not* the exception that was
+ # passed to throw(), because __exit__() must not raise
+ # an exception unless __exit__() itself failed. But throw()
+ # has to raise the exception to signal propagation, so this
+ # fixes the impedance mismatch between the throw() protocol
+ # and the __exit__() protocol.
+ #
+ # This cannot use 'except BaseException as exc' (as in the
+ # async implementation) to maintain compatibility with
+ # Python 2, where old-style class exceptions are not caught
+ # by 'except BaseException'.
+ if sys.exc_info()[1] is value:
+ return False
+ raise
+ raise RuntimeError("generator didn't stop after throw()")
+
+
+class _AsyncGeneratorContextManager(_GeneratorContextManagerBase,
+ AbstractAsyncContextManager):
+ """Helper for @asynccontextmanager."""
+
+ async def __aenter__(self):
+ try:
+ return await self.gen.__anext__()
+ except StopAsyncIteration:
+ raise RuntimeError("generator didn't yield") from None
+
+ async def __aexit__(self, typ, value, traceback):
+ if typ is None:
+ try:
+ await self.gen.__anext__()
+ except StopAsyncIteration:
+ return
+ else:
+ raise RuntimeError("generator didn't stop")
+ else:
+ if value is None:
+ value = typ()
+ # See _GeneratorContextManager.__exit__ for comments on subtleties
+ # in this implementation
+ try:
+ await self.gen.athrow(typ, value, traceback)
+ raise RuntimeError("generator didn't stop after athrow()")
+ except StopAsyncIteration as exc:
+ return exc is not value
+ except RuntimeError as exc:
+ if exc is value:
+ return False
+ # Avoid suppressing if a StopIteration exception
+ # was passed to throw() and later wrapped into a RuntimeError
+ # (see PEP 479 for sync generators; async generators also
+ # have this behavior). But do this only if the exception wrapped
+ # by the RuntimeError is actully Stop(Async)Iteration (see
+ # issue29692).
+ if isinstance(value, (StopIteration, StopAsyncIteration)):
+ if exc.__cause__ is value:
+ return False
+ raise
+ except BaseException as exc:
+ if exc is not value:
+ raise
+
+
+def contextmanager(func):
+ """@contextmanager decorator.
+
+ Typical usage:
+
+ @contextmanager
+ def some_generator():
+
+ try:
+ yield
+ finally:
+
+
+ This makes this:
+
+ with some_generator() as :
+
+
+ equivalent to this:
+
+
+ try:
+ =
+
+ finally:
+
+ """
+ @wraps(func)
+ def helper(*args, **kwds):
+ return _GeneratorContextManager(func, args, kwds)
+ return helper
+
+
+def asynccontextmanager(func):
+ """@asynccontextmanager decorator.
+
+ Typical usage:
+
+ @asynccontextmanager
+ async def some_async_generator():
+
+ try:
+ yield
+ finally:
+
+
+ This makes this:
+
+ async with some_async_generator() as :
+
+
+ equivalent to this:
+
+
+ try:
+ =
+
+ finally:
+
+ """
+ @wraps(func)
+ def helper(*args, **kwds):
+ return _AsyncGeneratorContextManager(func, args, kwds)
+ return helper
+
+
+class closing(AbstractContextManager):
+ """Context to automatically close something at the end of a block.
+
+ Code like this:
+
+ with closing(.open()) as f:
+
+
+ is equivalent to this:
+
+ f = .open()
+ try:
+
+ finally:
+ f.close()
+
+ """
+ def __init__(self, thing):
+ self.thing = thing
+ def __enter__(self):
+ return self.thing
+ def __exit__(self, *exc_info):
+ self.thing.close()
+
+
+class _RedirectStream(AbstractContextManager):
+
+ _stream = None
+
+ def __init__(self, new_target):
+ self._new_target = new_target
+ # We use a list of old targets to make this CM re-entrant
+ self._old_targets = []
+
+ def __enter__(self):
+ self._old_targets.append(getattr(sys, self._stream))
+ setattr(sys, self._stream, self._new_target)
+ return self._new_target
+
+ def __exit__(self, exctype, excinst, exctb):
+ setattr(sys, self._stream, self._old_targets.pop())
+
+
+class redirect_stdout(_RedirectStream):
+ """Context manager for temporarily redirecting stdout to another file.
+
+ # How to send help() to stderr
+ with redirect_stdout(sys.stderr):
+ help(dir)
+
+ # How to write help() to a file
+ with open('help.txt', 'w') as f:
+ with redirect_stdout(f):
+ help(pow)
+ """
+
+ _stream = "stdout"
+
+
+class redirect_stderr(_RedirectStream):
+ """Context manager for temporarily redirecting stderr to another file."""
+
+ _stream = "stderr"
+
+
+class suppress(AbstractContextManager):
+ """Context manager to suppress specified exceptions
+
+ After the exception is suppressed, execution proceeds with the next
+ statement following the with statement.
+
+ with suppress(FileNotFoundError):
+ os.remove(somefile)
+ # Execution still resumes here if the file was already removed
+ """
+
+ def __init__(self, *exceptions):
+ self._exceptions = exceptions
+
+ def __enter__(self):
+ pass
+
+ def __exit__(self, exctype, excinst, exctb):
+ # Unlike isinstance and issubclass, CPython exception handling
+ # currently only looks at the concrete type hierarchy (ignoring
+ # the instance and subclass checking hooks). While Guido considers
+ # that a bug rather than a feature, it's a fairly hard one to fix
+ # due to various internal implementation details. suppress provides
+ # the simpler issubclass based semantics, rather than trying to
+ # exactly reproduce the limitations of the CPython interpreter.
+ #
+ # See http://bugs.python.org/issue12029 for more details
+ return exctype is not None and issubclass(exctype, self._exceptions)
+
+
+class _BaseExitStack:
+ """A base class for ExitStack and AsyncExitStack."""
+
+ @staticmethod
+ def _create_exit_wrapper(cm, cm_exit):
+ return MethodType(cm_exit, cm)
+
+ @staticmethod
+ def _create_cb_wrapper(callback, /, *args, **kwds):
+ def _exit_wrapper(exc_type, exc, tb):
+ callback(*args, **kwds)
+ return _exit_wrapper
+
+ def __init__(self):
+ self._exit_callbacks = deque()
+
+ def pop_all(self):
+ """Preserve the context stack by transferring it to a new instance."""
+ new_stack = type(self)()
+ new_stack._exit_callbacks = self._exit_callbacks
+ self._exit_callbacks = deque()
+ return new_stack
+
+ def push(self, exit):
+ """Registers a callback with the standard __exit__ method signature.
+
+ Can suppress exceptions the same way __exit__ method can.
+ Also accepts any object with an __exit__ method (registering a call
+ to the method instead of the object itself).
+ """
+ # We use an unbound method rather than a bound method to follow
+ # the standard lookup behaviour for special methods.
+ _cb_type = type(exit)
+
+ try:
+ exit_method = _cb_type.__exit__
+ except AttributeError:
+ # Not a context manager, so assume it's a callable.
+ self._push_exit_callback(exit)
+ else:
+ self._push_cm_exit(exit, exit_method)
+ return exit # Allow use as a decorator.
+
+ def enter_context(self, cm):
+ """Enters the supplied context manager.
+
+ If successful, also pushes its __exit__ method as a callback and
+ returns the result of the __enter__ method.
+ """
+ # We look up the special methods on the type to match the with
+ # statement.
+ _cm_type = type(cm)
+ _exit = _cm_type.__exit__
+ result = _cm_type.__enter__(cm)
+ self._push_cm_exit(cm, _exit)
+ return result
+
+ def callback(*args, **kwds):
+ """Registers an arbitrary callback and arguments.
+
+ Cannot suppress exceptions.
+ """
+ if len(args) >= 2:
+ self, callback, *args = args
+ elif not args:
+ raise TypeError("descriptor 'callback' of '_BaseExitStack' object "
+ "needs an argument")
+ elif 'callback' in kwds:
+ callback = kwds.pop('callback')
+ self, *args = args
+ import warnings
+ warnings.warn("Passing 'callback' as keyword argument is deprecated",
+ DeprecationWarning, stacklevel=2)
+ else:
+ raise TypeError('callback expected at least 1 positional argument, '
+ 'got %d' % (len(args)-1))
+
+ _exit_wrapper = self._create_cb_wrapper(callback, *args, **kwds)
+
+ # We changed the signature, so using @wraps is not appropriate, but
+ # setting __wrapped__ may still help with introspection.
+ _exit_wrapper.__wrapped__ = callback
+ self._push_exit_callback(_exit_wrapper)
+ return callback # Allow use as a decorator
+ callback.__text_signature__ = '($self, callback, /, *args, **kwds)'
+
+ def _push_cm_exit(self, cm, cm_exit):
+ """Helper to correctly register callbacks to __exit__ methods."""
+ _exit_wrapper = self._create_exit_wrapper(cm, cm_exit)
+ self._push_exit_callback(_exit_wrapper, True)
+
+ def _push_exit_callback(self, callback, is_sync=True):
+ self._exit_callbacks.append((is_sync, callback))
+
+
+# Inspired by discussions on http://bugs.python.org/issue13585
+class ExitStack(_BaseExitStack, AbstractContextManager):
+ """Context manager for dynamic management of a stack of exit callbacks.
+
+ For example:
+ with ExitStack() as stack:
+ files = [stack.enter_context(open(fname)) for fname in filenames]
+ # All opened files will automatically be closed at the end of
+ # the with statement, even if attempts to open files later
+ # in the list raise an exception.
+ """
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *exc_details):
+ received_exc = exc_details[0] is not None
+
+ # We manipulate the exception state so it behaves as though
+ # we were actually nesting multiple with statements
+ frame_exc = sys.exc_info()[1]
+ def _fix_exception_context(new_exc, old_exc):
+ # Context may not be correct, so find the end of the chain
+ while 1:
+ exc_context = new_exc.__context__
+ if exc_context is old_exc:
+ # Context is already set correctly (see issue 20317)
+ return
+ if exc_context is None or exc_context is frame_exc:
+ break
+ new_exc = exc_context
+ # Change the end of the chain to point to the exception
+ # we expect it to reference
+ new_exc.__context__ = old_exc
+
+ # Callbacks are invoked in LIFO order to match the behaviour of
+ # nested context managers
+ suppressed_exc = False
+ pending_raise = False
+ while self._exit_callbacks:
+ is_sync, cb = self._exit_callbacks.pop()
+ assert is_sync
+ try:
+ if cb(*exc_details):
+ suppressed_exc = True
+ pending_raise = False
+ exc_details = (None, None, None)
+ except:
+ new_exc_details = sys.exc_info()
+ # simulate the stack of exceptions by setting the context
+ _fix_exception_context(new_exc_details[1], exc_details[1])
+ pending_raise = True
+ exc_details = new_exc_details
+ if pending_raise:
+ try:
+ # bare "raise exc_details[1]" replaces our carefully
+ # set-up context
+ fixed_ctx = exc_details[1].__context__
+ raise exc_details[1]
+ except BaseException:
+ exc_details[1].__context__ = fixed_ctx
+ raise
+ return received_exc and suppressed_exc
+
+ def close(self):
+ """Immediately unwind the context stack."""
+ self.__exit__(None, None, None)
+
+
+# Inspired by discussions on https://bugs.python.org/issue29302
+class AsyncExitStack(_BaseExitStack, AbstractAsyncContextManager):
+ """Async context manager for dynamic management of a stack of exit
+ callbacks.
+
+ For example:
+ async with AsyncExitStack() as stack:
+ connections = [await stack.enter_async_context(get_connection())
+ for i in range(5)]
+ # All opened connections will automatically be released at the
+ # end of the async with statement, even if attempts to open a
+ # connection later in the list raise an exception.
+ """
+
+ @staticmethod
+ def _create_async_exit_wrapper(cm, cm_exit):
+ return MethodType(cm_exit, cm)
+
+ @staticmethod
+ def _create_async_cb_wrapper(callback, /, *args, **kwds):
+ async def _exit_wrapper(exc_type, exc, tb):
+ await callback(*args, **kwds)
+ return _exit_wrapper
+
+ async def enter_async_context(self, cm):
+ """Enters the supplied async context manager.
+
+ If successful, also pushes its __aexit__ method as a callback and
+ returns the result of the __aenter__ method.
+ """
+ _cm_type = type(cm)
+ _exit = _cm_type.__aexit__
+ result = await _cm_type.__aenter__(cm)
+ self._push_async_cm_exit(cm, _exit)
+ return result
+
+ def push_async_exit(self, exit):
+ """Registers a coroutine function with the standard __aexit__ method
+ signature.
+
+ Can suppress exceptions the same way __aexit__ method can.
+ Also accepts any object with an __aexit__ method (registering a call
+ to the method instead of the object itself).
+ """
+ _cb_type = type(exit)
+ try:
+ exit_method = _cb_type.__aexit__
+ except AttributeError:
+ # Not an async context manager, so assume it's a coroutine function
+ self._push_exit_callback(exit, False)
+ else:
+ self._push_async_cm_exit(exit, exit_method)
+ return exit # Allow use as a decorator
+
+ def push_async_callback(*args, **kwds):
+ """Registers an arbitrary coroutine function and arguments.
+
+ Cannot suppress exceptions.
+ """
+ if len(args) >= 2:
+ self, callback, *args = args
+ elif not args:
+ raise TypeError("descriptor 'push_async_callback' of "
+ "'AsyncExitStack' object needs an argument")
+ elif 'callback' in kwds:
+ callback = kwds.pop('callback')
+ self, *args = args
+ import warnings
+ warnings.warn("Passing 'callback' as keyword argument is deprecated",
+ DeprecationWarning, stacklevel=2)
+ else:
+ raise TypeError('push_async_callback expected at least 1 '
+ 'positional argument, got %d' % (len(args)-1))
+
+ _exit_wrapper = self._create_async_cb_wrapper(callback, *args, **kwds)
+
+ # We changed the signature, so using @wraps is not appropriate, but
+ # setting __wrapped__ may still help with introspection.
+ _exit_wrapper.__wrapped__ = callback
+ self._push_exit_callback(_exit_wrapper, False)
+ return callback # Allow use as a decorator
+ push_async_callback.__text_signature__ = '($self, callback, /, *args, **kwds)'
+
+ async def aclose(self):
+ """Immediately unwind the context stack."""
+ await self.__aexit__(None, None, None)
+
+ def _push_async_cm_exit(self, cm, cm_exit):
+ """Helper to correctly register coroutine function to __aexit__
+ method."""
+ _exit_wrapper = self._create_async_exit_wrapper(cm, cm_exit)
+ self._push_exit_callback(_exit_wrapper, False)
+
+ async def __aenter__(self):
+ return self
+
+ async def __aexit__(self, *exc_details):
+ received_exc = exc_details[0] is not None
+
+ # We manipulate the exception state so it behaves as though
+ # we were actually nesting multiple with statements
+ frame_exc = sys.exc_info()[1]
+ def _fix_exception_context(new_exc, old_exc):
+ # Context may not be correct, so find the end of the chain
+ while 1:
+ exc_context = new_exc.__context__
+ if exc_context is old_exc:
+ # Context is already set correctly (see issue 20317)
+ return
+ if exc_context is None or exc_context is frame_exc:
+ break
+ new_exc = exc_context
+ # Change the end of the chain to point to the exception
+ # we expect it to reference
+ new_exc.__context__ = old_exc
+
+ # Callbacks are invoked in LIFO order to match the behaviour of
+ # nested context managers
+ suppressed_exc = False
+ pending_raise = False
+ while self._exit_callbacks:
+ is_sync, cb = self._exit_callbacks.pop()
+ try:
+ if is_sync:
+ cb_suppress = cb(*exc_details)
+ else:
+ cb_suppress = await cb(*exc_details)
+
+ if cb_suppress:
+ suppressed_exc = True
+ pending_raise = False
+ exc_details = (None, None, None)
+ except:
+ new_exc_details = sys.exc_info()
+ # simulate the stack of exceptions by setting the context
+ _fix_exception_context(new_exc_details[1], exc_details[1])
+ pending_raise = True
+ exc_details = new_exc_details
+ if pending_raise:
+ try:
+ # bare "raise exc_details[1]" replaces our carefully
+ # set-up context
+ fixed_ctx = exc_details[1].__context__
+ raise exc_details[1]
+ except BaseException:
+ exc_details[1].__context__ = fixed_ctx
+ raise
+ return received_exc and suppressed_exc
+
+
+class nullcontext(AbstractContextManager):
+ """Context manager that does no additional processing.
+
+ Used as a stand-in for a normal context manager, when a particular
+ block of code is only sometimes used with a normal context manager:
+
+ cm = optional_cm if condition else nullcontext()
+ with cm:
+ # Perform operation, using optional_cm if condition is True
+ """
+
+ def __init__(self, enter_result=None):
+ self.enter_result = enter_result
+
+ def __enter__(self):
+ return self.enter_result
+
+ def __exit__(self, *excinfo):
+ pass
diff --git a/dist/lib/contextvars.py b/dist/lib/contextvars.py
new file mode 100644
index 0000000..d78c80d
--- /dev/null
+++ b/dist/lib/contextvars.py
@@ -0,0 +1,4 @@
+from _contextvars import Context, ContextVar, Token, copy_context
+
+
+__all__ = ('Context', 'ContextVar', 'Token', 'copy_context')
diff --git a/dist/lib/copy.py b/dist/lib/copy.py
new file mode 100644
index 0000000..41873f2
--- /dev/null
+++ b/dist/lib/copy.py
@@ -0,0 +1,303 @@
+"""Generic (shallow and deep) copying operations.
+
+Interface summary:
+
+ import copy
+
+ x = copy.copy(y) # make a shallow copy of y
+ x = copy.deepcopy(y) # make a deep copy of y
+
+For module specific errors, copy.Error is raised.
+
+The difference between shallow and deep copying is only relevant for
+compound objects (objects that contain other objects, like lists or
+class instances).
+
+- A shallow copy constructs a new compound object and then (to the
+ extent possible) inserts *the same objects* into it that the
+ original contains.
+
+- A deep copy constructs a new compound object and then, recursively,
+ inserts *copies* into it of the objects found in the original.
+
+Two problems often exist with deep copy operations that don't exist
+with shallow copy operations:
+
+ a) recursive objects (compound objects that, directly or indirectly,
+ contain a reference to themselves) may cause a recursive loop
+
+ b) because deep copy copies *everything* it may copy too much, e.g.
+ administrative data structures that should be shared even between
+ copies
+
+Python's deep copy operation avoids these problems by:
+
+ a) keeping a table of objects already copied during the current
+ copying pass
+
+ b) letting user-defined classes override the copying operation or the
+ set of components copied
+
+This version does not copy types like module, class, function, method,
+nor stack trace, stack frame, nor file, socket, window, nor array, nor
+any similar types.
+
+Classes can use the same interfaces to control copying that they use
+to control pickling: they can define methods called __getinitargs__(),
+__getstate__() and __setstate__(). See the documentation for module
+"pickle" for information on these methods.
+"""
+
+import types
+import weakref
+from copyreg import dispatch_table
+
+class Error(Exception):
+ pass
+error = Error # backward compatibility
+
+try:
+ from org.python.core import PyStringMap
+except ImportError:
+ PyStringMap = None
+
+__all__ = ["Error", "copy", "deepcopy"]
+
+def copy(x):
+ """Shallow copy operation on arbitrary Python objects.
+
+ See the module's __doc__ string for more info.
+ """
+
+ cls = type(x)
+
+ copier = _copy_dispatch.get(cls)
+ if copier:
+ return copier(x)
+
+ if issubclass(cls, type):
+ # treat it as a regular class:
+ return _copy_immutable(x)
+
+ copier = getattr(cls, "__copy__", None)
+ if copier is not None:
+ return copier(x)
+
+ reductor = dispatch_table.get(cls)
+ if reductor is not None:
+ rv = reductor(x)
+ else:
+ reductor = getattr(x, "__reduce_ex__", None)
+ if reductor is not None:
+ rv = reductor(4)
+ else:
+ reductor = getattr(x, "__reduce__", None)
+ if reductor:
+ rv = reductor()
+ else:
+ raise Error("un(shallow)copyable object of type %s" % cls)
+
+ if isinstance(rv, str):
+ return x
+ return _reconstruct(x, None, *rv)
+
+
+_copy_dispatch = d = {}
+
+def _copy_immutable(x):
+ return x
+for t in (type(None), int, float, bool, complex, str, tuple,
+ bytes, frozenset, type, range, slice, property,
+ types.BuiltinFunctionType, type(Ellipsis), type(NotImplemented),
+ types.FunctionType, weakref.ref):
+ d[t] = _copy_immutable
+t = getattr(types, "CodeType", None)
+if t is not None:
+ d[t] = _copy_immutable
+
+d[list] = list.copy
+d[dict] = dict.copy
+d[set] = set.copy
+d[bytearray] = bytearray.copy
+
+if PyStringMap is not None:
+ d[PyStringMap] = PyStringMap.copy
+
+del d, t
+
+def deepcopy(x, memo=None, _nil=[]):
+ """Deep copy operation on arbitrary Python objects.
+
+ See the module's __doc__ string for more info.
+ """
+
+ if memo is None:
+ memo = {}
+
+ d = id(x)
+ y = memo.get(d, _nil)
+ if y is not _nil:
+ return y
+
+ cls = type(x)
+
+ copier = _deepcopy_dispatch.get(cls)
+ if copier is not None:
+ y = copier(x, memo)
+ else:
+ if issubclass(cls, type):
+ y = _deepcopy_atomic(x, memo)
+ else:
+ copier = getattr(x, "__deepcopy__", None)
+ if copier is not None:
+ y = copier(memo)
+ else:
+ reductor = dispatch_table.get(cls)
+ if reductor:
+ rv = reductor(x)
+ else:
+ reductor = getattr(x, "__reduce_ex__", None)
+ if reductor is not None:
+ rv = reductor(4)
+ else:
+ reductor = getattr(x, "__reduce__", None)
+ if reductor:
+ rv = reductor()
+ else:
+ raise Error(
+ "un(deep)copyable object of type %s" % cls)
+ if isinstance(rv, str):
+ y = x
+ else:
+ y = _reconstruct(x, memo, *rv)
+
+ # If is its own copy, don't memoize.
+ if y is not x:
+ memo[d] = y
+ _keep_alive(x, memo) # Make sure x lives at least as long as d
+ return y
+
+_deepcopy_dispatch = d = {}
+
+def _deepcopy_atomic(x, memo):
+ return x
+d[type(None)] = _deepcopy_atomic
+d[type(Ellipsis)] = _deepcopy_atomic
+d[type(NotImplemented)] = _deepcopy_atomic
+d[int] = _deepcopy_atomic
+d[float] = _deepcopy_atomic
+d[bool] = _deepcopy_atomic
+d[complex] = _deepcopy_atomic
+d[bytes] = _deepcopy_atomic
+d[str] = _deepcopy_atomic
+d[types.CodeType] = _deepcopy_atomic
+d[type] = _deepcopy_atomic
+d[types.BuiltinFunctionType] = _deepcopy_atomic
+d[types.FunctionType] = _deepcopy_atomic
+d[weakref.ref] = _deepcopy_atomic
+d[property] = _deepcopy_atomic
+
+def _deepcopy_list(x, memo, deepcopy=deepcopy):
+ y = []
+ memo[id(x)] = y
+ append = y.append
+ for a in x:
+ append(deepcopy(a, memo))
+ return y
+d[list] = _deepcopy_list
+
+def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
+ y = [deepcopy(a, memo) for a in x]
+ # We're not going to put the tuple in the memo, but it's still important we
+ # check for it, in case the tuple contains recursive mutable structures.
+ try:
+ return memo[id(x)]
+ except KeyError:
+ pass
+ for k, j in zip(x, y):
+ if k is not j:
+ y = tuple(y)
+ break
+ else:
+ y = x
+ return y
+d[tuple] = _deepcopy_tuple
+
+def _deepcopy_dict(x, memo, deepcopy=deepcopy):
+ y = {}
+ memo[id(x)] = y
+ for key, value in x.items():
+ y[deepcopy(key, memo)] = deepcopy(value, memo)
+ return y
+d[dict] = _deepcopy_dict
+if PyStringMap is not None:
+ d[PyStringMap] = _deepcopy_dict
+
+def _deepcopy_method(x, memo): # Copy instance methods
+ return type(x)(x.__func__, deepcopy(x.__self__, memo))
+d[types.MethodType] = _deepcopy_method
+
+del d
+
+def _keep_alive(x, memo):
+ """Keeps a reference to the object x in the memo.
+
+ Because we remember objects by their id, we have
+ to assure that possibly temporary objects are kept
+ alive by referencing them.
+ We store a reference at the id of the memo, which should
+ normally not be used unless someone tries to deepcopy
+ the memo itself...
+ """
+ try:
+ memo[id(memo)].append(x)
+ except KeyError:
+ # aha, this is the first one :-)
+ memo[id(memo)]=[x]
+
+def _reconstruct(x, memo, func, args,
+ state=None, listiter=None, dictiter=None,
+ deepcopy=deepcopy):
+ deep = memo is not None
+ if deep and args:
+ args = (deepcopy(arg, memo) for arg in args)
+ y = func(*args)
+ if deep:
+ memo[id(x)] = y
+
+ if state is not None:
+ if deep:
+ state = deepcopy(state, memo)
+ if hasattr(y, '__setstate__'):
+ y.__setstate__(state)
+ else:
+ if isinstance(state, tuple) and len(state) == 2:
+ state, slotstate = state
+ else:
+ slotstate = None
+ if state is not None:
+ y.__dict__.update(state)
+ if slotstate is not None:
+ for key, value in slotstate.items():
+ setattr(y, key, value)
+
+ if listiter is not None:
+ if deep:
+ for item in listiter:
+ item = deepcopy(item, memo)
+ y.append(item)
+ else:
+ for item in listiter:
+ y.append(item)
+ if dictiter is not None:
+ if deep:
+ for key, value in dictiter:
+ key = deepcopy(key, memo)
+ value = deepcopy(value, memo)
+ y[key] = value
+ else:
+ for key, value in dictiter:
+ y[key] = value
+ return y
+
+del types, weakref, PyStringMap
diff --git a/dist/lib/copyreg.py b/dist/lib/copyreg.py
new file mode 100644
index 0000000..dfc463c
--- /dev/null
+++ b/dist/lib/copyreg.py
@@ -0,0 +1,209 @@
+"""Helper to provide extensibility for pickle.
+
+This is only useful to add pickle support for extension types defined in
+C, not for instances of user-defined classes.
+"""
+
+__all__ = ["pickle", "constructor",
+ "add_extension", "remove_extension", "clear_extension_cache"]
+
+dispatch_table = {}
+
+def pickle(ob_type, pickle_function, constructor_ob=None):
+ if not callable(pickle_function):
+ raise TypeError("reduction functions must be callable")
+ dispatch_table[ob_type] = pickle_function
+
+ # The constructor_ob function is a vestige of safe for unpickling.
+ # There is no reason for the caller to pass it anymore.
+ if constructor_ob is not None:
+ constructor(constructor_ob)
+
+def constructor(object):
+ if not callable(object):
+ raise TypeError("constructors must be callable")
+
+# Example: provide pickling support for complex numbers.
+
+try:
+ complex
+except NameError:
+ pass
+else:
+
+ def pickle_complex(c):
+ return complex, (c.real, c.imag)
+
+ pickle(complex, pickle_complex, complex)
+
+# Support for pickling new-style objects
+
+def _reconstructor(cls, base, state):
+ if base is object:
+ obj = object.__new__(cls)
+ else:
+ obj = base.__new__(cls, state)
+ if base.__init__ != object.__init__:
+ base.__init__(obj, state)
+ return obj
+
+_HEAPTYPE = 1<<9
+
+# Python code for object.__reduce_ex__ for protocols 0 and 1
+
+def _reduce_ex(self, proto):
+ assert proto < 2
+ cls = self.__class__
+ for base in cls.__mro__:
+ if hasattr(base, '__flags__') and not base.__flags__ & _HEAPTYPE:
+ break
+ else:
+ base = object # not really reachable
+ if base is object:
+ state = None
+ else:
+ if base is cls:
+ raise TypeError(f"cannot pickle {cls.__name__!r} object")
+ state = base(self)
+ args = (cls, base, state)
+ try:
+ getstate = self.__getstate__
+ except AttributeError:
+ if getattr(self, "__slots__", None):
+ raise TypeError(f"cannot pickle {cls.__name__!r} object: "
+ f"a class that defines __slots__ without "
+ f"defining __getstate__ cannot be pickled "
+ f"with protocol {proto}") from None
+ try:
+ dict = self.__dict__
+ except AttributeError:
+ dict = None
+ else:
+ dict = getstate()
+ if dict:
+ return _reconstructor, args, dict
+ else:
+ return _reconstructor, args
+
+# Helper for __reduce_ex__ protocol 2
+
+def __newobj__(cls, *args):
+ return cls.__new__(cls, *args)
+
+def __newobj_ex__(cls, args, kwargs):
+ """Used by pickle protocol 4, instead of __newobj__ to allow classes with
+ keyword-only arguments to be pickled correctly.
+ """
+ return cls.__new__(cls, *args, **kwargs)
+
+def _slotnames(cls):
+ """Return a list of slot names for a given class.
+
+ This needs to find slots defined by the class and its bases, so we
+ can't simply return the __slots__ attribute. We must walk down
+ the Method Resolution Order and concatenate the __slots__ of each
+ class found there. (This assumes classes don't modify their
+ __slots__ attribute to misrepresent their slots after the class is
+ defined.)
+ """
+
+ # Get the value from a cache in the class if possible
+ names = cls.__dict__.get("__slotnames__")
+ if names is not None:
+ return names
+
+ # Not cached -- calculate the value
+ names = []
+ if not hasattr(cls, "__slots__"):
+ # This class has no slots
+ pass
+ else:
+ # Slots found -- gather slot names from all base classes
+ for c in cls.__mro__:
+ if "__slots__" in c.__dict__:
+ slots = c.__dict__['__slots__']
+ # if class has a single slot, it can be given as a string
+ if isinstance(slots, str):
+ slots = (slots,)
+ for name in slots:
+ # special descriptors
+ if name in ("__dict__", "__weakref__"):
+ continue
+ # mangled names
+ elif name.startswith('__') and not name.endswith('__'):
+ stripped = c.__name__.lstrip('_')
+ if stripped:
+ names.append('_%s%s' % (stripped, name))
+ else:
+ names.append(name)
+ else:
+ names.append(name)
+
+ # Cache the outcome in the class if at all possible
+ try:
+ cls.__slotnames__ = names
+ except:
+ pass # But don't die if we can't
+
+ return names
+
+# A registry of extension codes. This is an ad-hoc compression
+# mechanism. Whenever a global reference to , is about
+# to be pickled, the (, ) tuple is looked up here to see
+# if it is a registered extension code for it. Extension codes are
+# universal, so that the meaning of a pickle does not depend on
+# context. (There are also some codes reserved for local use that
+# don't have this restriction.) Codes are positive ints; 0 is
+# reserved.
+
+_extension_registry = {} # key -> code
+_inverted_registry = {} # code -> key
+_extension_cache = {} # code -> object
+# Don't ever rebind those names: pickling grabs a reference to them when
+# it's initialized, and won't see a rebinding.
+
+def add_extension(module, name, code):
+ """Register an extension code."""
+ code = int(code)
+ if not 1 <= code <= 0x7fffffff:
+ raise ValueError("code out of range")
+ key = (module, name)
+ if (_extension_registry.get(key) == code and
+ _inverted_registry.get(code) == key):
+ return # Redundant registrations are benign
+ if key in _extension_registry:
+ raise ValueError("key %s is already registered with code %s" %
+ (key, _extension_registry[key]))
+ if code in _inverted_registry:
+ raise ValueError("code %s is already in use for key %s" %
+ (code, _inverted_registry[code]))
+ _extension_registry[key] = code
+ _inverted_registry[code] = key
+
+def remove_extension(module, name, code):
+ """Unregister an extension code. For testing only."""
+ key = (module, name)
+ if (_extension_registry.get(key) != code or
+ _inverted_registry.get(code) != key):
+ raise ValueError("key %s is not registered with code %s" %
+ (key, code))
+ del _extension_registry[key]
+ del _inverted_registry[code]
+ if code in _extension_cache:
+ del _extension_cache[code]
+
+def clear_extension_cache():
+ _extension_cache.clear()
+
+# Standard extension code assignments
+
+# Reserved ranges
+
+# First Last Count Purpose
+# 1 127 127 Reserved for Python standard library
+# 128 191 64 Reserved for Zope
+# 192 239 48 Reserved for 3rd parties
+# 240 255 16 Reserved for private use (will never be assigned)
+# 256 Inf Inf Reserved for future assignment
+
+# Extension codes are assigned by the Python Software Foundation.
diff --git a/dist/lib/crypt.py b/dist/lib/crypt.py
new file mode 100644
index 0000000..8846602
--- /dev/null
+++ b/dist/lib/crypt.py
@@ -0,0 +1,112 @@
+"""Wrapper to the POSIX crypt library call and associated functionality."""
+
+import sys as _sys
+
+try:
+ import _crypt
+except ModuleNotFoundError:
+ if _sys.platform == 'win32':
+ raise ImportError("The crypt module is not supported on Windows")
+ else:
+ raise ImportError("The required _crypt module was not built as part of CPython")
+
+import string as _string
+from random import SystemRandom as _SystemRandom
+from collections import namedtuple as _namedtuple
+
+
+_saltchars = _string.ascii_letters + _string.digits + './'
+_sr = _SystemRandom()
+
+
+class _Method(_namedtuple('_Method', 'name ident salt_chars total_size')):
+
+ """Class representing a salt method per the Modular Crypt Format or the
+ legacy 2-character crypt method."""
+
+ def __repr__(self):
+ return ''.format(self.name)
+
+
+def mksalt(method=None, *, rounds=None):
+ """Generate a salt for the specified method.
+
+ If not specified, the strongest available method will be used.
+
+ """
+ if method is None:
+ method = methods[0]
+ if rounds is not None and not isinstance(rounds, int):
+ raise TypeError(f'{rounds.__class__.__name__} object cannot be '
+ f'interpreted as an integer')
+ if not method.ident: # traditional
+ s = ''
+ else: # modular
+ s = f'${method.ident}$'
+
+ if method.ident and method.ident[0] == '2': # Blowfish variants
+ if rounds is None:
+ log_rounds = 12
+ else:
+ log_rounds = int.bit_length(rounds-1)
+ if rounds != 1 << log_rounds:
+ raise ValueError('rounds must be a power of 2')
+ if not 4 <= log_rounds <= 31:
+ raise ValueError('rounds out of the range 2**4 to 2**31')
+ s += f'{log_rounds:02d}$'
+ elif method.ident in ('5', '6'): # SHA-2
+ if rounds is not None:
+ if not 1000 <= rounds <= 999_999_999:
+ raise ValueError('rounds out of the range 1000 to 999_999_999')
+ s += f'rounds={rounds}$'
+ elif rounds is not None:
+ raise ValueError(f"{method} doesn't support the rounds argument")
+
+ s += ''.join(_sr.choice(_saltchars) for char in range(method.salt_chars))
+ return s
+
+
+def crypt(word, salt=None):
+ """Return a string representing the one-way hash of a password, with a salt
+ prepended.
+
+ If ``salt`` is not specified or is ``None``, the strongest
+ available method will be selected and a salt generated. Otherwise,
+ ``salt`` may be one of the ``crypt.METHOD_*`` values, or a string as
+ returned by ``crypt.mksalt()``.
+
+ """
+ if salt is None or isinstance(salt, _Method):
+ salt = mksalt(salt)
+ return _crypt.crypt(word, salt)
+
+
+# available salting/crypto methods
+methods = []
+
+def _add_method(name, *args, rounds=None):
+ method = _Method(name, *args)
+ globals()['METHOD_' + name] = method
+ salt = mksalt(method, rounds=rounds)
+ result = crypt('', salt)
+ if result and len(result) == method.total_size:
+ methods.append(method)
+ return True
+ return False
+
+_add_method('SHA512', '6', 16, 106)
+_add_method('SHA256', '5', 16, 63)
+
+# Choose the strongest supported version of Blowfish hashing.
+# Early versions have flaws. Version 'a' fixes flaws of
+# the initial implementation, 'b' fixes flaws of 'a'.
+# 'y' is the same as 'b', for compatibility
+# with openwall crypt_blowfish.
+for _v in 'b', 'y', 'a', '':
+ if _add_method('BLOWFISH', '2' + _v, 22, 59 + len(_v), rounds=1<<4):
+ break
+
+_add_method('MD5', '1', 8, 34)
+_add_method('CRYPT', None, 2, 13)
+
+del _v, _add_method
diff --git a/dist/lib/csv.py b/dist/lib/csv.py
new file mode 100644
index 0000000..dc85077
--- /dev/null
+++ b/dist/lib/csv.py
@@ -0,0 +1,448 @@
+
+"""
+csv.py - read/write/investigate CSV files
+"""
+
+import re
+from _csv import Error, __version__, writer, reader, register_dialect, \
+ unregister_dialect, get_dialect, list_dialects, \
+ field_size_limit, \
+ QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \
+ __doc__
+from _csv import Dialect as _Dialect
+
+from io import StringIO
+
+__all__ = ["QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE",
+ "Error", "Dialect", "__doc__", "excel", "excel_tab",
+ "field_size_limit", "reader", "writer",
+ "register_dialect", "get_dialect", "list_dialects", "Sniffer",
+ "unregister_dialect", "__version__", "DictReader", "DictWriter",
+ "unix_dialect"]
+
+class Dialect:
+ """Describe a CSV dialect.
+
+ This must be subclassed (see csv.excel). Valid attributes are:
+ delimiter, quotechar, escapechar, doublequote, skipinitialspace,
+ lineterminator, quoting.
+
+ """
+ _name = ""
+ _valid = False
+ # placeholders
+ delimiter = None
+ quotechar = None
+ escapechar = None
+ doublequote = None
+ skipinitialspace = None
+ lineterminator = None
+ quoting = None
+
+ def __init__(self):
+ if self.__class__ != Dialect:
+ self._valid = True
+ self._validate()
+
+ def _validate(self):
+ try:
+ _Dialect(self)
+ except TypeError as e:
+ # We do this for compatibility with py2.3
+ raise Error(str(e))
+
+class excel(Dialect):
+ """Describe the usual properties of Excel-generated CSV files."""
+ delimiter = ','
+ quotechar = '"'
+ doublequote = True
+ skipinitialspace = False
+ lineterminator = '\r\n'
+ quoting = QUOTE_MINIMAL
+register_dialect("excel", excel)
+
+class excel_tab(excel):
+ """Describe the usual properties of Excel-generated TAB-delimited files."""
+ delimiter = '\t'
+register_dialect("excel-tab", excel_tab)
+
+class unix_dialect(Dialect):
+ """Describe the usual properties of Unix-generated CSV files."""
+ delimiter = ','
+ quotechar = '"'
+ doublequote = True
+ skipinitialspace = False
+ lineterminator = '\n'
+ quoting = QUOTE_ALL
+register_dialect("unix", unix_dialect)
+
+
+class DictReader:
+ def __init__(self, f, fieldnames=None, restkey=None, restval=None,
+ dialect="excel", *args, **kwds):
+ self._fieldnames = fieldnames # list of keys for the dict
+ self.restkey = restkey # key to catch long rows
+ self.restval = restval # default value for short rows
+ self.reader = reader(f, dialect, *args, **kwds)
+ self.dialect = dialect
+ self.line_num = 0
+
+ def __iter__(self):
+ return self
+
+ @property
+ def fieldnames(self):
+ if self._fieldnames is None:
+ try:
+ self._fieldnames = next(self.reader)
+ except StopIteration:
+ pass
+ self.line_num = self.reader.line_num
+ return self._fieldnames
+
+ @fieldnames.setter
+ def fieldnames(self, value):
+ self._fieldnames = value
+
+ def __next__(self):
+ if self.line_num == 0:
+ # Used only for its side effect.
+ self.fieldnames
+ row = next(self.reader)
+ self.line_num = self.reader.line_num
+
+ # unlike the basic reader, we prefer not to return blanks,
+ # because we will typically wind up with a dict full of None
+ # values
+ while row == []:
+ row = next(self.reader)
+ d = dict(zip(self.fieldnames, row))
+ lf = len(self.fieldnames)
+ lr = len(row)
+ if lf < lr:
+ d[self.restkey] = row[lf:]
+ elif lf > lr:
+ for key in self.fieldnames[lr:]:
+ d[key] = self.restval
+ return d
+
+
+class DictWriter:
+ def __init__(self, f, fieldnames, restval="", extrasaction="raise",
+ dialect="excel", *args, **kwds):
+ self.fieldnames = fieldnames # list of keys for the dict
+ self.restval = restval # for writing short dicts
+ if extrasaction.lower() not in ("raise", "ignore"):
+ raise ValueError("extrasaction (%s) must be 'raise' or 'ignore'"
+ % extrasaction)
+ self.extrasaction = extrasaction
+ self.writer = writer(f, dialect, *args, **kwds)
+
+ def writeheader(self):
+ header = dict(zip(self.fieldnames, self.fieldnames))
+ return self.writerow(header)
+
+ def _dict_to_list(self, rowdict):
+ if self.extrasaction == "raise":
+ wrong_fields = rowdict.keys() - self.fieldnames
+ if wrong_fields:
+ raise ValueError("dict contains fields not in fieldnames: "
+ + ", ".join([repr(x) for x in wrong_fields]))
+ return (rowdict.get(key, self.restval) for key in self.fieldnames)
+
+ def writerow(self, rowdict):
+ return self.writer.writerow(self._dict_to_list(rowdict))
+
+ def writerows(self, rowdicts):
+ return self.writer.writerows(map(self._dict_to_list, rowdicts))
+
+# Guard Sniffer's type checking against builds that exclude complex()
+try:
+ complex
+except NameError:
+ complex = float
+
+class Sniffer:
+ '''
+ "Sniffs" the format of a CSV file (i.e. delimiter, quotechar)
+ Returns a Dialect object.
+ '''
+ def __init__(self):
+ # in case there is more than one possible delimiter
+ self.preferred = [',', '\t', ';', ' ', ':']
+
+
+ def sniff(self, sample, delimiters=None):
+ """
+ Returns a dialect (or None) corresponding to the sample
+ """
+
+ quotechar, doublequote, delimiter, skipinitialspace = \
+ self._guess_quote_and_delimiter(sample, delimiters)
+ if not delimiter:
+ delimiter, skipinitialspace = self._guess_delimiter(sample,
+ delimiters)
+
+ if not delimiter:
+ raise Error("Could not determine delimiter")
+
+ class dialect(Dialect):
+ _name = "sniffed"
+ lineterminator = '\r\n'
+ quoting = QUOTE_MINIMAL
+ # escapechar = ''
+
+ dialect.doublequote = doublequote
+ dialect.delimiter = delimiter
+ # _csv.reader won't accept a quotechar of ''
+ dialect.quotechar = quotechar or '"'
+ dialect.skipinitialspace = skipinitialspace
+
+ return dialect
+
+
+ def _guess_quote_and_delimiter(self, data, delimiters):
+ """
+ Looks for text enclosed between two identical quotes
+ (the probable quotechar) which are preceded and followed
+ by the same character (the probable delimiter).
+ For example:
+ ,'some text',
+ The quote with the most wins, same with the delimiter.
+ If there is no quotechar the delimiter can't be determined
+ this way.
+ """
+
+ matches = []
+ for restr in (r'(?P[^\w\n"\'])(?P ?)(?P["\']).*?(?P=quote)(?P=delim)', # ,".*?",
+ r'(?:^|\n)(?P["\']).*?(?P=quote)(?P[^\w\n"\'])(?P ?)', # ".*?",
+ r'(?P[^\w\n"\'])(?P ?)(?P["\']).*?(?P=quote)(?:$|\n)', # ,".*?"
+ r'(?:^|\n)(?P["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space)
+ regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
+ matches = regexp.findall(data)
+ if matches:
+ break
+
+ if not matches:
+ # (quotechar, doublequote, delimiter, skipinitialspace)
+ return ('', False, None, 0)
+ quotes = {}
+ delims = {}
+ spaces = 0
+ groupindex = regexp.groupindex
+ for m in matches:
+ n = groupindex['quote'] - 1
+ key = m[n]
+ if key:
+ quotes[key] = quotes.get(key, 0) + 1
+ try:
+ n = groupindex['delim'] - 1
+ key = m[n]
+ except KeyError:
+ continue
+ if key and (delimiters is None or key in delimiters):
+ delims[key] = delims.get(key, 0) + 1
+ try:
+ n = groupindex['space'] - 1
+ except KeyError:
+ continue
+ if m[n]:
+ spaces += 1
+
+ quotechar = max(quotes, key=quotes.get)
+
+ if delims:
+ delim = max(delims, key=delims.get)
+ skipinitialspace = delims[delim] == spaces
+ if delim == '\n': # most likely a file with a single column
+ delim = ''
+ else:
+ # there is *no* delimiter, it's a single column of quoted data
+ delim = ''
+ skipinitialspace = 0
+
+ # if we see an extra quote between delimiters, we've got a
+ # double quoted format
+ dq_regexp = re.compile(
+ r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
+ {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
+
+
+
+ if dq_regexp.search(data):
+ doublequote = True
+ else:
+ doublequote = False
+
+ return (quotechar, doublequote, delim, skipinitialspace)
+
+
+ def _guess_delimiter(self, data, delimiters):
+ """
+ The delimiter /should/ occur the same number of times on
+ each row. However, due to malformed data, it may not. We don't want
+ an all or nothing approach, so we allow for small variations in this
+ number.
+ 1) build a table of the frequency of each character on every line.
+ 2) build a table of frequencies of this frequency (meta-frequency?),
+ e.g. 'x occurred 5 times in 10 rows, 6 times in 1000 rows,
+ 7 times in 2 rows'
+ 3) use the mode of the meta-frequency to determine the /expected/
+ frequency for that character
+ 4) find out how often the character actually meets that goal
+ 5) the character that best meets its goal is the delimiter
+ For performance reasons, the data is evaluated in chunks, so it can
+ try and evaluate the smallest portion of the data possible, evaluating
+ additional chunks as necessary.
+ """
+
+ data = list(filter(None, data.split('\n')))
+
+ ascii = [chr(c) for c in range(127)] # 7-bit ASCII
+
+ # build frequency tables
+ chunkLength = min(10, len(data))
+ iteration = 0
+ charFrequency = {}
+ modes = {}
+ delims = {}
+ start, end = 0, chunkLength
+ while start < len(data):
+ iteration += 1
+ for line in data[start:end]:
+ for char in ascii:
+ metaFrequency = charFrequency.get(char, {})
+ # must count even if frequency is 0
+ freq = line.count(char)
+ # value is the mode
+ metaFrequency[freq] = metaFrequency.get(freq, 0) + 1
+ charFrequency[char] = metaFrequency
+
+ for char in charFrequency.keys():
+ items = list(charFrequency[char].items())
+ if len(items) == 1 and items[0][0] == 0:
+ continue
+ # get the mode of the frequencies
+ if len(items) > 1:
+ modes[char] = max(items, key=lambda x: x[1])
+ # adjust the mode - subtract the sum of all
+ # other frequencies
+ items.remove(modes[char])
+ modes[char] = (modes[char][0], modes[char][1]
+ - sum(item[1] for item in items))
+ else:
+ modes[char] = items[0]
+
+ # build a list of possible delimiters
+ modeList = modes.items()
+ total = float(min(chunkLength * iteration, len(data)))
+ # (rows of consistent data) / (number of rows) = 100%
+ consistency = 1.0
+ # minimum consistency threshold
+ threshold = 0.9
+ while len(delims) == 0 and consistency >= threshold:
+ for k, v in modeList:
+ if v[0] > 0 and v[1] > 0:
+ if ((v[1]/total) >= consistency and
+ (delimiters is None or k in delimiters)):
+ delims[k] = v
+ consistency -= 0.01
+
+ if len(delims) == 1:
+ delim = list(delims.keys())[0]
+ skipinitialspace = (data[0].count(delim) ==
+ data[0].count("%c " % delim))
+ return (delim, skipinitialspace)
+
+ # analyze another chunkLength lines
+ start = end
+ end += chunkLength
+
+ if not delims:
+ return ('', 0)
+
+ # if there's more than one, fall back to a 'preferred' list
+ if len(delims) > 1:
+ for d in self.preferred:
+ if d in delims.keys():
+ skipinitialspace = (data[0].count(d) ==
+ data[0].count("%c " % d))
+ return (d, skipinitialspace)
+
+ # nothing else indicates a preference, pick the character that
+ # dominates(?)
+ items = [(v,k) for (k,v) in delims.items()]
+ items.sort()
+ delim = items[-1][1]
+
+ skipinitialspace = (data[0].count(delim) ==
+ data[0].count("%c " % delim))
+ return (delim, skipinitialspace)
+
+
+ def has_header(self, sample):
+ # Creates a dictionary of types of data in each column. If any
+ # column is of a single type (say, integers), *except* for the first
+ # row, then the first row is presumed to be labels. If the type
+ # can't be determined, it is assumed to be a string in which case
+ # the length of the string is the determining factor: if all of the
+ # rows except for the first are the same length, it's a header.
+ # Finally, a 'vote' is taken at the end for each column, adding or
+ # subtracting from the likelihood of the first row being a header.
+
+ rdr = reader(StringIO(sample), self.sniff(sample))
+
+ header = next(rdr) # assume first row is header
+
+ columns = len(header)
+ columnTypes = {}
+ for i in range(columns): columnTypes[i] = None
+
+ checked = 0
+ for row in rdr:
+ # arbitrary number of rows to check, to keep it sane
+ if checked > 20:
+ break
+ checked += 1
+
+ if len(row) != columns:
+ continue # skip rows that have irregular number of columns
+
+ for col in list(columnTypes.keys()):
+
+ for thisType in [int, float, complex]:
+ try:
+ thisType(row[col])
+ break
+ except (ValueError, OverflowError):
+ pass
+ else:
+ # fallback to length of string
+ thisType = len(row[col])
+
+ if thisType != columnTypes[col]:
+ if columnTypes[col] is None: # add new column type
+ columnTypes[col] = thisType
+ else:
+ # type is inconsistent, remove column from
+ # consideration
+ del columnTypes[col]
+
+ # finally, compare results against first row and "vote"
+ # on whether it's a header
+ hasHeader = 0
+ for col, colType in columnTypes.items():
+ if type(colType) == type(0): # it's a length
+ if len(header[col]) != colType:
+ hasHeader += 1
+ else:
+ hasHeader -= 1
+ else: # attempt typecast
+ try:
+ colType(header[col])
+ except (ValueError, TypeError):
+ hasHeader += 1
+ else:
+ hasHeader -= 1
+
+ return hasHeader > 0
diff --git a/dist/lib/ctypes/__init__.py b/dist/lib/ctypes/__init__.py
new file mode 100644
index 0000000..8f09911
--- /dev/null
+++ b/dist/lib/ctypes/__init__.py
@@ -0,0 +1,560 @@
+"""create and manipulate C data types in Python"""
+
+import os as _os, sys as _sys
+
+__version__ = "1.1.0"
+
+from _ctypes import Union, Structure, Array
+from _ctypes import _Pointer
+from _ctypes import CFuncPtr as _CFuncPtr
+from _ctypes import __version__ as _ctypes_version
+from _ctypes import RTLD_LOCAL, RTLD_GLOBAL
+from _ctypes import ArgumentError
+
+from struct import calcsize as _calcsize
+
+if __version__ != _ctypes_version:
+ raise Exception("Version number mismatch", __version__, _ctypes_version)
+
+if _os.name == "nt":
+ from _ctypes import FormatError
+
+DEFAULT_MODE = RTLD_LOCAL
+if _os.name == "posix" and _sys.platform == "darwin":
+ # On OS X 10.3, we use RTLD_GLOBAL as default mode
+ # because RTLD_LOCAL does not work at least on some
+ # libraries. OS X 10.3 is Darwin 7, so we check for
+ # that.
+
+ if int(_os.uname().release.split('.')[0]) < 8:
+ DEFAULT_MODE = RTLD_GLOBAL
+
+from _ctypes import FUNCFLAG_CDECL as _FUNCFLAG_CDECL, \
+ FUNCFLAG_PYTHONAPI as _FUNCFLAG_PYTHONAPI, \
+ FUNCFLAG_USE_ERRNO as _FUNCFLAG_USE_ERRNO, \
+ FUNCFLAG_USE_LASTERROR as _FUNCFLAG_USE_LASTERROR
+
+# WINOLEAPI -> HRESULT
+# WINOLEAPI_(type)
+#
+# STDMETHODCALLTYPE
+#
+# STDMETHOD(name)
+# STDMETHOD_(type, name)
+#
+# STDAPICALLTYPE
+
+def create_string_buffer(init, size=None):
+ """create_string_buffer(aBytes) -> character array
+ create_string_buffer(anInteger) -> character array
+ create_string_buffer(aBytes, anInteger) -> character array
+ """
+ if isinstance(init, bytes):
+ if size is None:
+ size = len(init)+1
+ _sys.audit("ctypes.create_string_buffer", init, size)
+ buftype = c_char * size
+ buf = buftype()
+ buf.value = init
+ return buf
+ elif isinstance(init, int):
+ _sys.audit("ctypes.create_string_buffer", None, init)
+ buftype = c_char * init
+ buf = buftype()
+ return buf
+ raise TypeError(init)
+
+def c_buffer(init, size=None):
+## "deprecated, use create_string_buffer instead"
+## import warnings
+## warnings.warn("c_buffer is deprecated, use create_string_buffer instead",
+## DeprecationWarning, stacklevel=2)
+ return create_string_buffer(init, size)
+
+_c_functype_cache = {}
+def CFUNCTYPE(restype, *argtypes, **kw):
+ """CFUNCTYPE(restype, *argtypes,
+ use_errno=False, use_last_error=False) -> function prototype.
+
+ restype: the result type
+ argtypes: a sequence specifying the argument types
+
+ The function prototype can be called in different ways to create a
+ callable object:
+
+ prototype(integer address) -> foreign function
+ prototype(callable) -> create and return a C callable function from callable
+ prototype(integer index, method name[, paramflags]) -> foreign function calling a COM method
+ prototype((ordinal number, dll object)[, paramflags]) -> foreign function exported by ordinal
+ prototype((function name, dll object)[, paramflags]) -> foreign function exported by name
+ """
+ flags = _FUNCFLAG_CDECL
+ if kw.pop("use_errno", False):
+ flags |= _FUNCFLAG_USE_ERRNO
+ if kw.pop("use_last_error", False):
+ flags |= _FUNCFLAG_USE_LASTERROR
+ if kw:
+ raise ValueError("unexpected keyword argument(s) %s" % kw.keys())
+ try:
+ return _c_functype_cache[(restype, argtypes, flags)]
+ except KeyError:
+ class CFunctionType(_CFuncPtr):
+ _argtypes_ = argtypes
+ _restype_ = restype
+ _flags_ = flags
+ _c_functype_cache[(restype, argtypes, flags)] = CFunctionType
+ return CFunctionType
+
+if _os.name == "nt":
+ from _ctypes import LoadLibrary as _dlopen
+ from _ctypes import FUNCFLAG_STDCALL as _FUNCFLAG_STDCALL
+
+ _win_functype_cache = {}
+ def WINFUNCTYPE(restype, *argtypes, **kw):
+ # docstring set later (very similar to CFUNCTYPE.__doc__)
+ flags = _FUNCFLAG_STDCALL
+ if kw.pop("use_errno", False):
+ flags |= _FUNCFLAG_USE_ERRNO
+ if kw.pop("use_last_error", False):
+ flags |= _FUNCFLAG_USE_LASTERROR
+ if kw:
+ raise ValueError("unexpected keyword argument(s) %s" % kw.keys())
+ try:
+ return _win_functype_cache[(restype, argtypes, flags)]
+ except KeyError:
+ class WinFunctionType(_CFuncPtr):
+ _argtypes_ = argtypes
+ _restype_ = restype
+ _flags_ = flags
+ _win_functype_cache[(restype, argtypes, flags)] = WinFunctionType
+ return WinFunctionType
+ if WINFUNCTYPE.__doc__:
+ WINFUNCTYPE.__doc__ = CFUNCTYPE.__doc__.replace("CFUNCTYPE", "WINFUNCTYPE")
+
+elif _os.name == "posix":
+ from _ctypes import dlopen as _dlopen
+
+from _ctypes import sizeof, byref, addressof, alignment, resize
+from _ctypes import get_errno, set_errno
+from _ctypes import _SimpleCData
+
+def _check_size(typ, typecode=None):
+ # Check if sizeof(ctypes_type) against struct.calcsize. This
+ # should protect somewhat against a misconfigured libffi.
+ from struct import calcsize
+ if typecode is None:
+ # Most _type_ codes are the same as used in struct
+ typecode = typ._type_
+ actual, required = sizeof(typ), calcsize(typecode)
+ if actual != required:
+ raise SystemError("sizeof(%s) wrong: %d instead of %d" % \
+ (typ, actual, required))
+
+class py_object(_SimpleCData):
+ _type_ = "O"
+ def __repr__(self):
+ try:
+ return super().__repr__()
+ except ValueError:
+ return "%s()" % type(self).__name__
+_check_size(py_object, "P")
+
+class c_short(_SimpleCData):
+ _type_ = "h"
+_check_size(c_short)
+
+class c_ushort(_SimpleCData):
+ _type_ = "H"
+_check_size(c_ushort)
+
+class c_long(_SimpleCData):
+ _type_ = "l"
+_check_size(c_long)
+
+class c_ulong(_SimpleCData):
+ _type_ = "L"
+_check_size(c_ulong)
+
+if _calcsize("i") == _calcsize("l"):
+ # if int and long have the same size, make c_int an alias for c_long
+ c_int = c_long
+ c_uint = c_ulong
+else:
+ class c_int(_SimpleCData):
+ _type_ = "i"
+ _check_size(c_int)
+
+ class c_uint(_SimpleCData):
+ _type_ = "I"
+ _check_size(c_uint)
+
+class c_float(_SimpleCData):
+ _type_ = "f"
+_check_size(c_float)
+
+class c_double(_SimpleCData):
+ _type_ = "d"
+_check_size(c_double)
+
+class c_longdouble(_SimpleCData):
+ _type_ = "g"
+if sizeof(c_longdouble) == sizeof(c_double):
+ c_longdouble = c_double
+
+if _calcsize("l") == _calcsize("q"):
+ # if long and long long have the same size, make c_longlong an alias for c_long
+ c_longlong = c_long
+ c_ulonglong = c_ulong
+else:
+ class c_longlong(_SimpleCData):
+ _type_ = "q"
+ _check_size(c_longlong)
+
+ class c_ulonglong(_SimpleCData):
+ _type_ = "Q"
+ ## def from_param(cls, val):
+ ## return ('d', float(val), val)
+ ## from_param = classmethod(from_param)
+ _check_size(c_ulonglong)
+
+class c_ubyte(_SimpleCData):
+ _type_ = "B"
+c_ubyte.__ctype_le__ = c_ubyte.__ctype_be__ = c_ubyte
+# backward compatibility:
+##c_uchar = c_ubyte
+_check_size(c_ubyte)
+
+class c_byte(_SimpleCData):
+ _type_ = "b"
+c_byte.__ctype_le__ = c_byte.__ctype_be__ = c_byte
+_check_size(c_byte)
+
+class c_char(_SimpleCData):
+ _type_ = "c"
+c_char.__ctype_le__ = c_char.__ctype_be__ = c_char
+_check_size(c_char)
+
+class c_char_p(_SimpleCData):
+ _type_ = "z"
+ def __repr__(self):
+ return "%s(%s)" % (self.__class__.__name__, c_void_p.from_buffer(self).value)
+_check_size(c_char_p, "P")
+
+class c_void_p(_SimpleCData):
+ _type_ = "P"
+c_voidp = c_void_p # backwards compatibility (to a bug)
+_check_size(c_void_p)
+
+class c_bool(_SimpleCData):
+ _type_ = "?"
+
+from _ctypes import POINTER, pointer, _pointer_type_cache
+
+class c_wchar_p(_SimpleCData):
+ _type_ = "Z"
+ def __repr__(self):
+ return "%s(%s)" % (self.__class__.__name__, c_void_p.from_buffer(self).value)
+
+class c_wchar(_SimpleCData):
+ _type_ = "u"
+
+def _reset_cache():
+ _pointer_type_cache.clear()
+ _c_functype_cache.clear()
+ if _os.name == "nt":
+ _win_functype_cache.clear()
+ # _SimpleCData.c_wchar_p_from_param
+ POINTER(c_wchar).from_param = c_wchar_p.from_param
+ # _SimpleCData.c_char_p_from_param
+ POINTER(c_char).from_param = c_char_p.from_param
+ _pointer_type_cache[None] = c_void_p
+
+def create_unicode_buffer(init, size=None):
+ """create_unicode_buffer(aString) -> character array
+ create_unicode_buffer(anInteger) -> character array
+ create_unicode_buffer(aString, anInteger) -> character array
+ """
+ if isinstance(init, str):
+ if size is None:
+ if sizeof(c_wchar) == 2:
+ # UTF-16 requires a surrogate pair (2 wchar_t) for non-BMP
+ # characters (outside [U+0000; U+FFFF] range). +1 for trailing
+ # NUL character.
+ size = sum(2 if ord(c) > 0xFFFF else 1 for c in init) + 1
+ else:
+ # 32-bit wchar_t (1 wchar_t per Unicode character). +1 for
+ # trailing NUL character.
+ size = len(init) + 1
+ _sys.audit("ctypes.create_unicode_buffer", init, size)
+ buftype = c_wchar * size
+ buf = buftype()
+ buf.value = init
+ return buf
+ elif isinstance(init, int):
+ _sys.audit("ctypes.create_unicode_buffer", None, init)
+ buftype = c_wchar * init
+ buf = buftype()
+ return buf
+ raise TypeError(init)
+
+
+# XXX Deprecated
+def SetPointerType(pointer, cls):
+ if _pointer_type_cache.get(cls, None) is not None:
+ raise RuntimeError("This type already exists in the cache")
+ if id(pointer) not in _pointer_type_cache:
+ raise RuntimeError("What's this???")
+ pointer.set_type(cls)
+ _pointer_type_cache[cls] = pointer
+ del _pointer_type_cache[id(pointer)]
+
+# XXX Deprecated
+def ARRAY(typ, len):
+ return typ * len
+
+################################################################
+
+
+class CDLL(object):
+ """An instance of this class represents a loaded dll/shared
+ library, exporting functions using the standard C calling
+ convention (named 'cdecl' on Windows).
+
+ The exported functions can be accessed as attributes, or by
+ indexing with the function name. Examples:
+
+ .qsort -> callable object
+ ['qsort'] -> callable object
+
+ Calling the functions releases the Python GIL during the call and
+ reacquires it afterwards.
+ """
+ _func_flags_ = _FUNCFLAG_CDECL
+ _func_restype_ = c_int
+ # default values for repr
+ _name = ''
+ _handle = 0
+ _FuncPtr = None
+
+ def __init__(self, name, mode=DEFAULT_MODE, handle=None,
+ use_errno=False,
+ use_last_error=False,
+ winmode=None):
+ self._name = name
+ flags = self._func_flags_
+ if use_errno:
+ flags |= _FUNCFLAG_USE_ERRNO
+ if use_last_error:
+ flags |= _FUNCFLAG_USE_LASTERROR
+ if _sys.platform.startswith("aix"):
+ """When the name contains ".a(" and ends with ")",
+ e.g., "libFOO.a(libFOO.so)" - this is taken to be an
+ archive(member) syntax for dlopen(), and the mode is adjusted.
+ Otherwise, name is presented to dlopen() as a file argument.
+ """
+ if name and name.endswith(")") and ".a(" in name:
+ mode |= ( _os.RTLD_MEMBER | _os.RTLD_NOW )
+ if _os.name == "nt":
+ if winmode is not None:
+ mode = winmode
+ else:
+ import nt
+ mode = nt._LOAD_LIBRARY_SEARCH_DEFAULT_DIRS
+ if '/' in name or '\\' in name:
+ self._name = nt._getfullpathname(self._name)
+ mode |= nt._LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR
+
+ class _FuncPtr(_CFuncPtr):
+ _flags_ = flags
+ _restype_ = self._func_restype_
+ self._FuncPtr = _FuncPtr
+
+ if handle is None:
+ self._handle = _dlopen(self._name, mode)
+ else:
+ self._handle = handle
+
+ def __repr__(self):
+ return "<%s '%s', handle %x at %#x>" % \
+ (self.__class__.__name__, self._name,
+ (self._handle & (_sys.maxsize*2 + 1)),
+ id(self) & (_sys.maxsize*2 + 1))
+
+ def __getattr__(self, name):
+ if name.startswith('__') and name.endswith('__'):
+ raise AttributeError(name)
+ func = self.__getitem__(name)
+ setattr(self, name, func)
+ return func
+
+ def __getitem__(self, name_or_ordinal):
+ func = self._FuncPtr((name_or_ordinal, self))
+ if not isinstance(name_or_ordinal, int):
+ func.__name__ = name_or_ordinal
+ return func
+
+class PyDLL(CDLL):
+ """This class represents the Python library itself. It allows
+ accessing Python API functions. The GIL is not released, and
+ Python exceptions are handled correctly.
+ """
+ _func_flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI
+
+if _os.name == "nt":
+
+ class WinDLL(CDLL):
+ """This class represents a dll exporting functions using the
+ Windows stdcall calling convention.
+ """
+ _func_flags_ = _FUNCFLAG_STDCALL
+
+ # XXX Hm, what about HRESULT as normal parameter?
+ # Mustn't it derive from c_long then?
+ from _ctypes import _check_HRESULT, _SimpleCData
+ class HRESULT(_SimpleCData):
+ _type_ = "l"
+ # _check_retval_ is called with the function's result when it
+ # is used as restype. It checks for the FAILED bit, and
+ # raises an OSError if it is set.
+ #
+ # The _check_retval_ method is implemented in C, so that the
+ # method definition itself is not included in the traceback
+ # when it raises an error - that is what we want (and Python
+ # doesn't have a way to raise an exception in the caller's
+ # frame).
+ _check_retval_ = _check_HRESULT
+
+ class OleDLL(CDLL):
+ """This class represents a dll exporting functions using the
+ Windows stdcall calling convention, and returning HRESULT.
+ HRESULT error values are automatically raised as OSError
+ exceptions.
+ """
+ _func_flags_ = _FUNCFLAG_STDCALL
+ _func_restype_ = HRESULT
+
+class LibraryLoader(object):
+ def __init__(self, dlltype):
+ self._dlltype = dlltype
+
+ def __getattr__(self, name):
+ if name[0] == '_':
+ raise AttributeError(name)
+ dll = self._dlltype(name)
+ setattr(self, name, dll)
+ return dll
+
+ def __getitem__(self, name):
+ return getattr(self, name)
+
+ def LoadLibrary(self, name):
+ return self._dlltype(name)
+
+cdll = LibraryLoader(CDLL)
+pydll = LibraryLoader(PyDLL)
+
+if _os.name == "nt":
+ pythonapi = PyDLL("python dll", None, _sys.dllhandle)
+elif _sys.platform == "cygwin":
+ pythonapi = PyDLL("libpython%d.%d.dll" % _sys.version_info[:2])
+else:
+ pythonapi = PyDLL(None)
+
+
+if _os.name == "nt":
+ windll = LibraryLoader(WinDLL)
+ oledll = LibraryLoader(OleDLL)
+
+ GetLastError = windll.kernel32.GetLastError
+ from _ctypes import get_last_error, set_last_error
+
+ def WinError(code=None, descr=None):
+ if code is None:
+ code = GetLastError()
+ if descr is None:
+ descr = FormatError(code).strip()
+ return OSError(None, descr, None, code)
+
+if sizeof(c_uint) == sizeof(c_void_p):
+ c_size_t = c_uint
+ c_ssize_t = c_int
+elif sizeof(c_ulong) == sizeof(c_void_p):
+ c_size_t = c_ulong
+ c_ssize_t = c_long
+elif sizeof(c_ulonglong) == sizeof(c_void_p):
+ c_size_t = c_ulonglong
+ c_ssize_t = c_longlong
+
+# functions
+
+from _ctypes import _memmove_addr, _memset_addr, _string_at_addr, _cast_addr
+
+## void *memmove(void *, const void *, size_t);
+memmove = CFUNCTYPE(c_void_p, c_void_p, c_void_p, c_size_t)(_memmove_addr)
+
+## void *memset(void *, int, size_t)
+memset = CFUNCTYPE(c_void_p, c_void_p, c_int, c_size_t)(_memset_addr)
+
+def PYFUNCTYPE(restype, *argtypes):
+ class CFunctionType(_CFuncPtr):
+ _argtypes_ = argtypes
+ _restype_ = restype
+ _flags_ = _FUNCFLAG_CDECL | _FUNCFLAG_PYTHONAPI
+ return CFunctionType
+
+_cast = PYFUNCTYPE(py_object, c_void_p, py_object, py_object)(_cast_addr)
+def cast(obj, typ):
+ return _cast(obj, obj, typ)
+
+_string_at = PYFUNCTYPE(py_object, c_void_p, c_int)(_string_at_addr)
+def string_at(ptr, size=-1):
+ """string_at(addr[, size]) -> string
+
+ Return the string at addr."""
+ return _string_at(ptr, size)
+
+try:
+ from _ctypes import _wstring_at_addr
+except ImportError:
+ pass
+else:
+ _wstring_at = PYFUNCTYPE(py_object, c_void_p, c_int)(_wstring_at_addr)
+ def wstring_at(ptr, size=-1):
+ """wstring_at(addr[, size]) -> string
+
+ Return the string at addr."""
+ return _wstring_at(ptr, size)
+
+
+if _os.name == "nt": # COM stuff
+ def DllGetClassObject(rclsid, riid, ppv):
+ try:
+ ccom = __import__("comtypes.server.inprocserver", globals(), locals(), ['*'])
+ except ImportError:
+ return -2147221231 # CLASS_E_CLASSNOTAVAILABLE
+ else:
+ return ccom.DllGetClassObject(rclsid, riid, ppv)
+
+ def DllCanUnloadNow():
+ try:
+ ccom = __import__("comtypes.server.inprocserver", globals(), locals(), ['*'])
+ except ImportError:
+ return 0 # S_OK
+ return ccom.DllCanUnloadNow()
+
+from ctypes._endian import BigEndianStructure, LittleEndianStructure
+
+# Fill in specifically-sized types
+c_int8 = c_byte
+c_uint8 = c_ubyte
+for kind in [c_short, c_int, c_long, c_longlong]:
+ if sizeof(kind) == 2: c_int16 = kind
+ elif sizeof(kind) == 4: c_int32 = kind
+ elif sizeof(kind) == 8: c_int64 = kind
+for kind in [c_ushort, c_uint, c_ulong, c_ulonglong]:
+ if sizeof(kind) == 2: c_uint16 = kind
+ elif sizeof(kind) == 4: c_uint32 = kind
+ elif sizeof(kind) == 8: c_uint64 = kind
+del(kind)
+
+_reset_cache()
diff --git a/dist/lib/ctypes/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/lib/ctypes/__pycache__/__init__.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..6888d81
Binary files /dev/null and b/dist/lib/ctypes/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/ctypes/__pycache__/_aix.cpython-38.opt-1.pyc b/dist/lib/ctypes/__pycache__/_aix.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..634b84b
Binary files /dev/null and b/dist/lib/ctypes/__pycache__/_aix.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/ctypes/__pycache__/_endian.cpython-38.opt-1.pyc b/dist/lib/ctypes/__pycache__/_endian.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..4313bda
Binary files /dev/null and b/dist/lib/ctypes/__pycache__/_endian.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/ctypes/__pycache__/util.cpython-38.opt-1.pyc b/dist/lib/ctypes/__pycache__/util.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..ee7411f
Binary files /dev/null and b/dist/lib/ctypes/__pycache__/util.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/ctypes/__pycache__/wintypes.cpython-38.opt-1.pyc b/dist/lib/ctypes/__pycache__/wintypes.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..f97ef6e
Binary files /dev/null and b/dist/lib/ctypes/__pycache__/wintypes.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/ctypes/_aix.py b/dist/lib/ctypes/_aix.py
new file mode 100644
index 0000000..190cac6
--- /dev/null
+++ b/dist/lib/ctypes/_aix.py
@@ -0,0 +1,331 @@
+"""
+Lib/ctypes.util.find_library() support for AIX
+Similar approach as done for Darwin support by using separate files
+but unlike Darwin - no extension such as ctypes.macholib.*
+
+dlopen() is an interface to AIX initAndLoad() - primary documentation at:
+https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/dlopen.htm
+https://www.ibm.com/support/knowledgecenter/en/ssw_aix_61/com.ibm.aix.basetrf1/load.htm
+
+AIX supports two styles for dlopen(): svr4 (System V Release 4) which is common on posix
+platforms, but also a BSD style - aka SVR3.
+
+From AIX 5.3 Difference Addendum (December 2004)
+2.9 SVR4 linking affinity
+Nowadays, there are two major object file formats used by the operating systems:
+XCOFF: The COFF enhanced by IBM and others. The original COFF (Common
+Object File Format) was the base of SVR3 and BSD 4.2 systems.
+ELF: Executable and Linking Format that was developed by AT&T and is a
+base for SVR4 UNIX.
+
+While the shared library content is identical on AIX - one is located as a filepath name
+(svr4 style) and the other is located as a member of an archive (and the archive
+is located as a filepath name).
+
+The key difference arises when supporting multiple abi formats (i.e., 32 and 64 bit).
+For svr4 either only one ABI is supported, or there are two directories, or there
+are different file names. The most common solution for multiple ABI is multiple
+directories.
+
+For the XCOFF (aka AIX) style - one directory (one archive file) is sufficient
+as multiple shared libraries can be in the archive - even sharing the same name.
+In documentation the archive is also referred to as the "base" and the shared
+library object is referred to as the "member".
+
+For dlopen() on AIX (read initAndLoad()) the calls are similar.
+Default activity occurs when no path information is provided. When path
+information is provided dlopen() does not search any other directories.
+
+For SVR4 - the shared library name is the name of the file expected: libFOO.so
+For AIX - the shared library is expressed as base(member). The search is for the
+base (e.g., libFOO.a) and once the base is found the shared library - identified by
+member (e.g., libFOO.so, or shr.o) is located and loaded.
+
+The mode bit RTLD_MEMBER tells initAndLoad() that it needs to use the AIX (SVR3)
+naming style.
+"""
+__author__ = "Michael Felt "
+
+import re
+from os import environ, path
+from sys import executable
+from ctypes import c_void_p, sizeof
+from subprocess import Popen, PIPE, DEVNULL
+
+# Executable bit size - 32 or 64
+# Used to filter the search in an archive by size, e.g., -X64
+AIX_ABI = sizeof(c_void_p) * 8
+
+
+from sys import maxsize
+def _last_version(libnames, sep):
+ def _num_version(libname):
+ # "libxyz.so.MAJOR.MINOR" => [MAJOR, MINOR]
+ parts = libname.split(sep)
+ nums = []
+ try:
+ while parts:
+ nums.insert(0, int(parts.pop()))
+ except ValueError:
+ pass
+ return nums or [maxsize]
+ return max(reversed(libnames), key=_num_version)
+
+def get_ld_header(p):
+ # "nested-function, but placed at module level
+ ld_header = None
+ for line in p.stdout:
+ if line.startswith(('/', './', '../')):
+ ld_header = line
+ elif "INDEX" in line:
+ return ld_header.rstrip('\n')
+ return None
+
+def get_ld_header_info(p):
+ # "nested-function, but placed at module level
+ # as an ld_header was found, return known paths, archives and members
+ # these lines start with a digit
+ info = []
+ for line in p.stdout:
+ if re.match("[0-9]", line):
+ info.append(line)
+ else:
+ # blank line (separator), consume line and end for loop
+ break
+ return info
+
+def get_ld_headers(file):
+ """
+ Parse the header of the loader section of executable and archives
+ This function calls /usr/bin/dump -H as a subprocess
+ and returns a list of (ld_header, ld_header_info) tuples.
+ """
+ # get_ld_headers parsing:
+ # 1. Find a line that starts with /, ./, or ../ - set as ld_header
+ # 2. If "INDEX" in occurs in a following line - return ld_header
+ # 3. get info (lines starting with [0-9])
+ ldr_headers = []
+ p = Popen(["/usr/bin/dump", f"-X{AIX_ABI}", "-H", file],
+ universal_newlines=True, stdout=PIPE, stderr=DEVNULL)
+ # be sure to read to the end-of-file - getting all entries
+ while True:
+ ld_header = get_ld_header(p)
+ if ld_header:
+ ldr_headers.append((ld_header, get_ld_header_info(p)))
+ else:
+ break
+ p.stdout.close()
+ p.wait()
+ return ldr_headers
+
+def get_shared(ld_headers):
+ """
+ extract the shareable objects from ld_headers
+ character "[" is used to strip off the path information.
+ Note: the "[" and "]" characters that are part of dump -H output
+ are not removed here.
+ """
+ shared = []
+ for (line, _) in ld_headers:
+ # potential member lines contain "["
+ # otherwise, no processing needed
+ if "[" in line:
+ # Strip off trailing colon (:)
+ shared.append(line[line.index("["):-1])
+ return shared
+
+def get_one_match(expr, lines):
+ """
+ Must be only one match, otherwise result is None.
+ When there is a match, strip leading "[" and trailing "]"
+ """
+ # member names in the ld_headers output are between square brackets
+ expr = rf'\[({expr})\]'
+ matches = list(filter(None, (re.search(expr, line) for line in lines)))
+ if len(matches) == 1:
+ return matches[0].group(1)
+ else:
+ return None
+
+# additional processing to deal with AIX legacy names for 64-bit members
+def get_legacy(members):
+ """
+ This routine provides historical aka legacy naming schemes started
+ in AIX4 shared library support for library members names.
+ e.g., in /usr/lib/libc.a the member name shr.o for 32-bit binary and
+ shr_64.o for 64-bit binary.
+ """
+ if AIX_ABI == 64:
+ # AIX 64-bit member is one of shr64.o, shr_64.o, or shr4_64.o
+ expr = r'shr4?_?64\.o'
+ member = get_one_match(expr, members)
+ if member:
+ return member
+ else:
+ # 32-bit legacy names - both shr.o and shr4.o exist.
+ # shr.o is the preffered name so we look for shr.o first
+ # i.e., shr4.o is returned only when shr.o does not exist
+ for name in ['shr.o', 'shr4.o']:
+ member = get_one_match(re.escape(name), members)
+ if member:
+ return member
+ return None
+
+def get_version(name, members):
+ """
+ Sort list of members and return highest numbered version - if it exists.
+ This function is called when an unversioned libFOO.a(libFOO.so) has
+ not been found.
+
+ Versioning for the member name is expected to follow
+ GNU LIBTOOL conventions: the highest version (x, then X.y, then X.Y.z)
+ * find [libFoo.so.X]
+ * find [libFoo.so.X.Y]
+ * find [libFoo.so.X.Y.Z]
+
+ Before the GNU convention became the standard scheme regardless of
+ binary size AIX packagers used GNU convention "as-is" for 32-bit
+ archive members but used an "distinguishing" name for 64-bit members.
+ This scheme inserted either 64 or _64 between libFOO and .so
+ - generally libFOO_64.so, but occasionally libFOO64.so
+ """
+ # the expression ending for versions must start as
+ # '.so.[0-9]', i.e., *.so.[at least one digit]
+ # while multiple, more specific expressions could be specified
+ # to search for .so.X, .so.X.Y and .so.X.Y.Z
+ # after the first required 'dot' digit
+ # any combination of additional 'dot' digits pairs are accepted
+ # anything more than libFOO.so.digits.digits.digits
+ # should be seen as a member name outside normal expectations
+ exprs = [rf'lib{name}\.so\.[0-9]+[0-9.]*',
+ rf'lib{name}_?64\.so\.[0-9]+[0-9.]*']
+ for expr in exprs:
+ versions = []
+ for line in members:
+ m = re.search(expr, line)
+ if m:
+ versions.append(m.group(0))
+ if versions:
+ return _last_version(versions, '.')
+ return None
+
+def get_member(name, members):
+ """
+ Return an archive member matching the request in name.
+ Name is the library name without any prefix like lib, suffix like .so,
+ or version number.
+ Given a list of members find and return the most appropriate result
+ Priority is given to generic libXXX.so, then a versioned libXXX.so.a.b.c
+ and finally, legacy AIX naming scheme.
+ """
+ # look first for a generic match - prepend lib and append .so
+ expr = rf'lib{name}\.so'
+ member = get_one_match(expr, members)
+ if member:
+ return member
+ elif AIX_ABI == 64:
+ expr = rf'lib{name}64\.so'
+ member = get_one_match(expr, members)
+ if member:
+ return member
+ # since an exact match with .so as suffix was not found
+ # look for a versioned name
+ # If a versioned name is not found, look for AIX legacy member name
+ member = get_version(name, members)
+ if member:
+ return member
+ else:
+ return get_legacy(members)
+
+def get_libpaths():
+ """
+ On AIX, the buildtime searchpath is stored in the executable.
+ as "loader header information".
+ The command /usr/bin/dump -H extracts this info.
+ Prefix searched libraries with LD_LIBRARY_PATH (preferred),
+ or LIBPATH if defined. These paths are appended to the paths
+ to libraries the python executable is linked with.
+ This mimics AIX dlopen() behavior.
+ """
+ libpaths = environ.get("LD_LIBRARY_PATH")
+ if libpaths is None:
+ libpaths = environ.get("LIBPATH")
+ if libpaths is None:
+ libpaths = []
+ else:
+ libpaths = libpaths.split(":")
+ objects = get_ld_headers(executable)
+ for (_, lines) in objects:
+ for line in lines:
+ # the second (optional) argument is PATH if it includes a /
+ path = line.split()[1]
+ if "/" in path:
+ libpaths.extend(path.split(":"))
+ return libpaths
+
+def find_shared(paths, name):
+ """
+ paths is a list of directories to search for an archive.
+ name is the abbreviated name given to find_library().
+ Process: search "paths" for archive, and if an archive is found
+ return the result of get_member().
+ If an archive is not found then return None
+ """
+ for dir in paths:
+ # /lib is a symbolic link to /usr/lib, skip it
+ if dir == "/lib":
+ continue
+ # "lib" is prefixed to emulate compiler name resolution,
+ # e.g., -lc to libc
+ base = f'lib{name}.a'
+ archive = path.join(dir, base)
+ if path.exists(archive):
+ members = get_shared(get_ld_headers(archive))
+ member = get_member(re.escape(name), members)
+ if member != None:
+ return (base, member)
+ else:
+ return (None, None)
+ return (None, None)
+
+def find_library(name):
+ """AIX implementation of ctypes.util.find_library()
+ Find an archive member that will dlopen(). If not available,
+ also search for a file (or link) with a .so suffix.
+
+ AIX supports two types of schemes that can be used with dlopen().
+ The so-called SystemV Release4 (svr4) format is commonly suffixed
+ with .so while the (default) AIX scheme has the library (archive)
+ ending with the suffix .a
+ As an archive has multiple members (e.g., 32-bit and 64-bit) in one file
+ the argument passed to dlopen must include both the library and
+ the member names in a single string.
+
+ find_library() looks first for an archive (.a) with a suitable member.
+ If no archive+member pair is found, look for a .so file.
+ """
+
+ libpaths = get_libpaths()
+ (base, member) = find_shared(libpaths, name)
+ if base != None:
+ return f"{base}({member})"
+
+ # To get here, a member in an archive has not been found
+ # In other words, either:
+ # a) a .a file was not found
+ # b) a .a file did not have a suitable member
+ # So, look for a .so file
+ # Check libpaths for .so file
+ # Note, the installation must prepare a link from a .so
+ # to a versioned file
+ # This is common practice by GNU libtool on other platforms
+ soname = f"lib{name}.so"
+ for dir in libpaths:
+ # /lib is a symbolic link to /usr/lib, skip it
+ if dir == "/lib":
+ continue
+ shlib = path.join(dir, soname)
+ if path.exists(shlib):
+ return soname
+ # if we are here, we have not found anything plausible
+ return None
diff --git a/dist/lib/ctypes/_endian.py b/dist/lib/ctypes/_endian.py
new file mode 100644
index 0000000..37444bd
--- /dev/null
+++ b/dist/lib/ctypes/_endian.py
@@ -0,0 +1,61 @@
+import sys
+from ctypes import *
+
+_array_type = type(Array)
+
+def _other_endian(typ):
+ """Return the type with the 'other' byte order. Simple types like
+ c_int and so on already have __ctype_be__ and __ctype_le__
+ attributes which contain the types, for more complicated types
+ arrays and structures are supported.
+ """
+ # check _OTHER_ENDIAN attribute (present if typ is primitive type)
+ if hasattr(typ, _OTHER_ENDIAN):
+ return getattr(typ, _OTHER_ENDIAN)
+ # if typ is array
+ if isinstance(typ, _array_type):
+ return _other_endian(typ._type_) * typ._length_
+ # if typ is structure
+ if issubclass(typ, Structure):
+ return typ
+ raise TypeError("This type does not support other endian: %s" % typ)
+
+class _swapped_meta(type(Structure)):
+ def __setattr__(self, attrname, value):
+ if attrname == "_fields_":
+ fields = []
+ for desc in value:
+ name = desc[0]
+ typ = desc[1]
+ rest = desc[2:]
+ fields.append((name, _other_endian(typ)) + rest)
+ value = fields
+ super().__setattr__(attrname, value)
+
+################################################################
+
+# Note: The Structure metaclass checks for the *presence* (not the
+# value!) of a _swapped_bytes_ attribute to determine the bit order in
+# structures containing bit fields.
+
+if sys.byteorder == "little":
+ _OTHER_ENDIAN = "__ctype_be__"
+
+ LittleEndianStructure = Structure
+
+ class BigEndianStructure(Structure, metaclass=_swapped_meta):
+ """Structure with big endian byte order"""
+ __slots__ = ()
+ _swappedbytes_ = None
+
+elif sys.byteorder == "big":
+ _OTHER_ENDIAN = "__ctype_le__"
+
+ BigEndianStructure = Structure
+ class LittleEndianStructure(Structure, metaclass=_swapped_meta):
+ """Structure with little endian byte order"""
+ __slots__ = ()
+ _swappedbytes_ = None
+
+else:
+ raise RuntimeError("Invalid byteorder")
diff --git a/dist/lib/ctypes/macholib/__init__.py b/dist/lib/ctypes/macholib/__init__.py
new file mode 100644
index 0000000..5621def
--- /dev/null
+++ b/dist/lib/ctypes/macholib/__init__.py
@@ -0,0 +1,9 @@
+"""
+Enough Mach-O to make your head spin.
+
+See the relevant header files in /usr/include/mach-o
+
+And also Apple's documentation.
+"""
+
+__version__ = '1.0'
diff --git a/dist/lib/ctypes/macholib/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/lib/ctypes/macholib/__pycache__/__init__.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..66903a2
Binary files /dev/null and b/dist/lib/ctypes/macholib/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/ctypes/macholib/__pycache__/dyld.cpython-38.opt-1.pyc b/dist/lib/ctypes/macholib/__pycache__/dyld.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..ffe3540
Binary files /dev/null and b/dist/lib/ctypes/macholib/__pycache__/dyld.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/ctypes/macholib/__pycache__/dylib.cpython-38.opt-1.pyc b/dist/lib/ctypes/macholib/__pycache__/dylib.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..f2c9802
Binary files /dev/null and b/dist/lib/ctypes/macholib/__pycache__/dylib.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/ctypes/macholib/__pycache__/framework.cpython-38.opt-1.pyc b/dist/lib/ctypes/macholib/__pycache__/framework.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..740fc67
Binary files /dev/null and b/dist/lib/ctypes/macholib/__pycache__/framework.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/ctypes/macholib/dyld.py b/dist/lib/ctypes/macholib/dyld.py
new file mode 100644
index 0000000..9d86b05
--- /dev/null
+++ b/dist/lib/ctypes/macholib/dyld.py
@@ -0,0 +1,161 @@
+"""
+dyld emulation
+"""
+
+import os
+from ctypes.macholib.framework import framework_info
+from ctypes.macholib.dylib import dylib_info
+from itertools import *
+
+__all__ = [
+ 'dyld_find', 'framework_find',
+ 'framework_info', 'dylib_info',
+]
+
+# These are the defaults as per man dyld(1)
+#
+DEFAULT_FRAMEWORK_FALLBACK = [
+ os.path.expanduser("~/Library/Frameworks"),
+ "/Library/Frameworks",
+ "/Network/Library/Frameworks",
+ "/System/Library/Frameworks",
+]
+
+DEFAULT_LIBRARY_FALLBACK = [
+ os.path.expanduser("~/lib"),
+ "/usr/local/lib",
+ "/lib",
+ "/usr/lib",
+]
+
+def dyld_env(env, var):
+ if env is None:
+ env = os.environ
+ rval = env.get(var)
+ if rval is None:
+ return []
+ return rval.split(':')
+
+def dyld_image_suffix(env=None):
+ if env is None:
+ env = os.environ
+ return env.get('DYLD_IMAGE_SUFFIX')
+
+def dyld_framework_path(env=None):
+ return dyld_env(env, 'DYLD_FRAMEWORK_PATH')
+
+def dyld_library_path(env=None):
+ return dyld_env(env, 'DYLD_LIBRARY_PATH')
+
+def dyld_fallback_framework_path(env=None):
+ return dyld_env(env, 'DYLD_FALLBACK_FRAMEWORK_PATH')
+
+def dyld_fallback_library_path(env=None):
+ return dyld_env(env, 'DYLD_FALLBACK_LIBRARY_PATH')
+
+def dyld_image_suffix_search(iterator, env=None):
+ """For a potential path iterator, add DYLD_IMAGE_SUFFIX semantics"""
+ suffix = dyld_image_suffix(env)
+ if suffix is None:
+ return iterator
+ def _inject(iterator=iterator, suffix=suffix):
+ for path in iterator:
+ if path.endswith('.dylib'):
+ yield path[:-len('.dylib')] + suffix + '.dylib'
+ else:
+ yield path + suffix
+ yield path
+ return _inject()
+
+def dyld_override_search(name, env=None):
+ # If DYLD_FRAMEWORK_PATH is set and this dylib_name is a
+ # framework name, use the first file that exists in the framework
+ # path if any. If there is none go on to search the DYLD_LIBRARY_PATH
+ # if any.
+
+ framework = framework_info(name)
+
+ if framework is not None:
+ for path in dyld_framework_path(env):
+ yield os.path.join(path, framework['name'])
+
+ # If DYLD_LIBRARY_PATH is set then use the first file that exists
+ # in the path. If none use the original name.
+ for path in dyld_library_path(env):
+ yield os.path.join(path, os.path.basename(name))
+
+def dyld_executable_path_search(name, executable_path=None):
+ # If we haven't done any searching and found a library and the
+ # dylib_name starts with "@executable_path/" then construct the
+ # library name.
+ if name.startswith('@executable_path/') and executable_path is not None:
+ yield os.path.join(executable_path, name[len('@executable_path/'):])
+
+def dyld_default_search(name, env=None):
+ yield name
+
+ framework = framework_info(name)
+
+ if framework is not None:
+ fallback_framework_path = dyld_fallback_framework_path(env)
+ for path in fallback_framework_path:
+ yield os.path.join(path, framework['name'])
+
+ fallback_library_path = dyld_fallback_library_path(env)
+ for path in fallback_library_path:
+ yield os.path.join(path, os.path.basename(name))
+
+ if framework is not None and not fallback_framework_path:
+ for path in DEFAULT_FRAMEWORK_FALLBACK:
+ yield os.path.join(path, framework['name'])
+
+ if not fallback_library_path:
+ for path in DEFAULT_LIBRARY_FALLBACK:
+ yield os.path.join(path, os.path.basename(name))
+
+def dyld_find(name, executable_path=None, env=None):
+ """
+ Find a library or framework using dyld semantics
+ """
+ for path in dyld_image_suffix_search(chain(
+ dyld_override_search(name, env),
+ dyld_executable_path_search(name, executable_path),
+ dyld_default_search(name, env),
+ ), env):
+ if os.path.isfile(path):
+ return path
+ raise ValueError("dylib %s could not be found" % (name,))
+
+def framework_find(fn, executable_path=None, env=None):
+ """
+ Find a framework using dyld semantics in a very loose manner.
+
+ Will take input such as:
+ Python
+ Python.framework
+ Python.framework/Versions/Current
+ """
+ error = None
+ try:
+ return dyld_find(fn, executable_path=executable_path, env=env)
+ except ValueError as e:
+ error = e
+ fmwk_index = fn.rfind('.framework')
+ if fmwk_index == -1:
+ fmwk_index = len(fn)
+ fn += '.framework'
+ fn = os.path.join(fn, os.path.basename(fn[:fmwk_index]))
+ try:
+ return dyld_find(fn, executable_path=executable_path, env=env)
+ except ValueError:
+ raise error
+ finally:
+ error = None
+
+def test_dyld_find():
+ env = {}
+ assert dyld_find('libSystem.dylib') == '/usr/lib/libSystem.dylib'
+ assert dyld_find('System.framework/System') == '/System/Library/Frameworks/System.framework/System'
+
+if __name__ == '__main__':
+ test_dyld_find()
diff --git a/dist/lib/ctypes/macholib/dylib.py b/dist/lib/ctypes/macholib/dylib.py
new file mode 100644
index 0000000..aa10750
--- /dev/null
+++ b/dist/lib/ctypes/macholib/dylib.py
@@ -0,0 +1,63 @@
+"""
+Generic dylib path manipulation
+"""
+
+import re
+
+__all__ = ['dylib_info']
+
+DYLIB_RE = re.compile(r"""(?x)
+(?P^.*)(?:^|/)
+(?P
+ (?P\w+?)
+ (?:\.(?P[^._]+))?
+ (?:_(?P[^._]+))?
+ \.dylib$
+)
+""")
+
+def dylib_info(filename):
+ """
+ A dylib name can take one of the following four forms:
+ Location/Name.SomeVersion_Suffix.dylib
+ Location/Name.SomeVersion.dylib
+ Location/Name_Suffix.dylib
+ Location/Name.dylib
+
+ returns None if not found or a mapping equivalent to:
+ dict(
+ location='Location',
+ name='Name.SomeVersion_Suffix.dylib',
+ shortname='Name',
+ version='SomeVersion',
+ suffix='Suffix',
+ )
+
+ Note that SomeVersion and Suffix are optional and may be None
+ if not present.
+ """
+ is_dylib = DYLIB_RE.match(filename)
+ if not is_dylib:
+ return None
+ return is_dylib.groupdict()
+
+
+def test_dylib_info():
+ def d(location=None, name=None, shortname=None, version=None, suffix=None):
+ return dict(
+ location=location,
+ name=name,
+ shortname=shortname,
+ version=version,
+ suffix=suffix
+ )
+ assert dylib_info('completely/invalid') is None
+ assert dylib_info('completely/invalide_debug') is None
+ assert dylib_info('P/Foo.dylib') == d('P', 'Foo.dylib', 'Foo')
+ assert dylib_info('P/Foo_debug.dylib') == d('P', 'Foo_debug.dylib', 'Foo', suffix='debug')
+ assert dylib_info('P/Foo.A.dylib') == d('P', 'Foo.A.dylib', 'Foo', 'A')
+ assert dylib_info('P/Foo_debug.A.dylib') == d('P', 'Foo_debug.A.dylib', 'Foo_debug', 'A')
+ assert dylib_info('P/Foo.A_debug.dylib') == d('P', 'Foo.A_debug.dylib', 'Foo', 'A', 'debug')
+
+if __name__ == '__main__':
+ test_dylib_info()
diff --git a/dist/lib/ctypes/macholib/framework.py b/dist/lib/ctypes/macholib/framework.py
new file mode 100644
index 0000000..ad6ed55
--- /dev/null
+++ b/dist/lib/ctypes/macholib/framework.py
@@ -0,0 +1,65 @@
+"""
+Generic framework path manipulation
+"""
+
+import re
+
+__all__ = ['framework_info']
+
+STRICT_FRAMEWORK_RE = re.compile(r"""(?x)
+(?P^.*)(?:^|/)
+(?P
+ (?P\w+).framework/
+ (?:Versions/(?P[^/]+)/)?
+ (?P=shortname)
+ (?:_(?P[^_]+))?
+)$
+""")
+
+def framework_info(filename):
+ """
+ A framework name can take one of the following four forms:
+ Location/Name.framework/Versions/SomeVersion/Name_Suffix
+ Location/Name.framework/Versions/SomeVersion/Name
+ Location/Name.framework/Name_Suffix
+ Location/Name.framework/Name
+
+ returns None if not found, or a mapping equivalent to:
+ dict(
+ location='Location',
+ name='Name.framework/Versions/SomeVersion/Name_Suffix',
+ shortname='Name',
+ version='SomeVersion',
+ suffix='Suffix',
+ )
+
+ Note that SomeVersion and Suffix are optional and may be None
+ if not present
+ """
+ is_framework = STRICT_FRAMEWORK_RE.match(filename)
+ if not is_framework:
+ return None
+ return is_framework.groupdict()
+
+def test_framework_info():
+ def d(location=None, name=None, shortname=None, version=None, suffix=None):
+ return dict(
+ location=location,
+ name=name,
+ shortname=shortname,
+ version=version,
+ suffix=suffix
+ )
+ assert framework_info('completely/invalid') is None
+ assert framework_info('completely/invalid/_debug') is None
+ assert framework_info('P/F.framework') is None
+ assert framework_info('P/F.framework/_debug') is None
+ assert framework_info('P/F.framework/F') == d('P', 'F.framework/F', 'F')
+ assert framework_info('P/F.framework/F_debug') == d('P', 'F.framework/F_debug', 'F', suffix='debug')
+ assert framework_info('P/F.framework/Versions') is None
+ assert framework_info('P/F.framework/Versions/A') is None
+ assert framework_info('P/F.framework/Versions/A/F') == d('P', 'F.framework/Versions/A/F', 'F', 'A')
+ assert framework_info('P/F.framework/Versions/A/F_debug') == d('P', 'F.framework/Versions/A/F_debug', 'F', 'A', 'debug')
+
+if __name__ == '__main__':
+ test_framework_info()
diff --git a/dist/lib/ctypes/util.py b/dist/lib/ctypes/util.py
new file mode 100644
index 0000000..97973bc
--- /dev/null
+++ b/dist/lib/ctypes/util.py
@@ -0,0 +1,358 @@
+import os
+import shutil
+import subprocess
+import sys
+
+# find_library(name) returns the pathname of a library, or None.
+if os.name == "nt":
+
+ def _get_build_version():
+ """Return the version of MSVC that was used to build Python.
+
+ For Python 2.3 and up, the version number is included in
+ sys.version. For earlier versions, assume the compiler is MSVC 6.
+ """
+ # This function was copied from Lib/distutils/msvccompiler.py
+ prefix = "MSC v."
+ i = sys.version.find(prefix)
+ if i == -1:
+ return 6
+ i = i + len(prefix)
+ s, rest = sys.version[i:].split(" ", 1)
+ majorVersion = int(s[:-2]) - 6
+ if majorVersion >= 13:
+ majorVersion += 1
+ minorVersion = int(s[2:3]) / 10.0
+ # I don't think paths are affected by minor version in version 6
+ if majorVersion == 6:
+ minorVersion = 0
+ if majorVersion >= 6:
+ return majorVersion + minorVersion
+ # else we don't know what version of the compiler this is
+ return None
+
+ def find_msvcrt():
+ """Return the name of the VC runtime dll"""
+ version = _get_build_version()
+ if version is None:
+ # better be safe than sorry
+ return None
+ if version <= 6:
+ clibname = 'msvcrt'
+ elif version <= 13:
+ clibname = 'msvcr%d' % (version * 10)
+ else:
+ # CRT is no longer directly loadable. See issue23606 for the
+ # discussion about alternative approaches.
+ return None
+
+ # If python was built with in debug mode
+ import importlib.machinery
+ if '_d.pyd' in importlib.machinery.EXTENSION_SUFFIXES:
+ clibname += 'd'
+ return clibname+'.dll'
+
+ def find_library(name):
+ if name in ('c', 'm'):
+ return find_msvcrt()
+ # See MSDN for the REAL search order.
+ for directory in os.environ['PATH'].split(os.pathsep):
+ fname = os.path.join(directory, name)
+ if os.path.isfile(fname):
+ return fname
+ if fname.lower().endswith(".dll"):
+ continue
+ fname = fname + ".dll"
+ if os.path.isfile(fname):
+ return fname
+ return None
+
+elif os.name == "posix" and sys.platform == "darwin":
+ from ctypes.macholib.dyld import dyld_find as _dyld_find
+ def find_library(name):
+ possible = ['lib%s.dylib' % name,
+ '%s.dylib' % name,
+ '%s.framework/%s' % (name, name)]
+ for name in possible:
+ try:
+ return _dyld_find(name)
+ except ValueError:
+ continue
+ return None
+
+elif sys.platform.startswith("aix"):
+ # AIX has two styles of storing shared libraries
+ # GNU auto_tools refer to these as svr4 and aix
+ # svr4 (System V Release 4) is a regular file, often with .so as suffix
+ # AIX style uses an archive (suffix .a) with members (e.g., shr.o, libssl.so)
+ # see issue#26439 and _aix.py for more details
+
+ from ctypes._aix import find_library
+
+elif os.name == "posix":
+ # Andreas Degert's find functions, using gcc, /sbin/ldconfig, objdump
+ import re, tempfile
+
+ def _findLib_gcc(name):
+ # Run GCC's linker with the -t (aka --trace) option and examine the
+ # library name it prints out. The GCC command will fail because we
+ # haven't supplied a proper program with main(), but that does not
+ # matter.
+ expr = os.fsencode(r'[^\(\)\s]*lib%s\.[^\(\)\s]*' % re.escape(name))
+
+ c_compiler = shutil.which('gcc')
+ if not c_compiler:
+ c_compiler = shutil.which('cc')
+ if not c_compiler:
+ # No C compiler available, give up
+ return None
+
+ temp = tempfile.NamedTemporaryFile()
+ try:
+ args = [c_compiler, '-Wl,-t', '-o', temp.name, '-l' + name]
+
+ env = dict(os.environ)
+ env['LC_ALL'] = 'C'
+ env['LANG'] = 'C'
+ try:
+ proc = subprocess.Popen(args,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ env=env)
+ except OSError: # E.g. bad executable
+ return None
+ with proc:
+ trace = proc.stdout.read()
+ finally:
+ try:
+ temp.close()
+ except FileNotFoundError:
+ # Raised if the file was already removed, which is the normal
+ # behaviour of GCC if linking fails
+ pass
+ res = re.search(expr, trace)
+ if not res:
+ return None
+ return os.fsdecode(res.group(0))
+
+
+ if sys.platform == "sunos5":
+ # use /usr/ccs/bin/dump on solaris
+ def _get_soname(f):
+ if not f:
+ return None
+
+ try:
+ proc = subprocess.Popen(("/usr/ccs/bin/dump", "-Lpv", f),
+ stdout=subprocess.PIPE,
+ stderr=subprocess.DEVNULL)
+ except OSError: # E.g. command not found
+ return None
+ with proc:
+ data = proc.stdout.read()
+ res = re.search(br'\[.*\]\sSONAME\s+([^\s]+)', data)
+ if not res:
+ return None
+ return os.fsdecode(res.group(1))
+ else:
+ def _get_soname(f):
+ # assuming GNU binutils / ELF
+ if not f:
+ return None
+ objdump = shutil.which('objdump')
+ if not objdump:
+ # objdump is not available, give up
+ return None
+
+ try:
+ proc = subprocess.Popen((objdump, '-p', '-j', '.dynamic', f),
+ stdout=subprocess.PIPE,
+ stderr=subprocess.DEVNULL)
+ except OSError: # E.g. bad executable
+ return None
+ with proc:
+ dump = proc.stdout.read()
+ res = re.search(br'\sSONAME\s+([^\s]+)', dump)
+ if not res:
+ return None
+ return os.fsdecode(res.group(1))
+
+ if sys.platform.startswith(("freebsd", "openbsd", "dragonfly")):
+
+ def _num_version(libname):
+ # "libxyz.so.MAJOR.MINOR" => [ MAJOR, MINOR ]
+ parts = libname.split(b".")
+ nums = []
+ try:
+ while parts:
+ nums.insert(0, int(parts.pop()))
+ except ValueError:
+ pass
+ return nums or [sys.maxsize]
+
+ def find_library(name):
+ ename = re.escape(name)
+ expr = r':-l%s\.\S+ => \S*/(lib%s\.\S+)' % (ename, ename)
+ expr = os.fsencode(expr)
+
+ try:
+ proc = subprocess.Popen(('/sbin/ldconfig', '-r'),
+ stdout=subprocess.PIPE,
+ stderr=subprocess.DEVNULL)
+ except OSError: # E.g. command not found
+ data = b''
+ else:
+ with proc:
+ data = proc.stdout.read()
+
+ res = re.findall(expr, data)
+ if not res:
+ return _get_soname(_findLib_gcc(name))
+ res.sort(key=_num_version)
+ return os.fsdecode(res[-1])
+
+ elif sys.platform == "sunos5":
+
+ def _findLib_crle(name, is64):
+ if not os.path.exists('/usr/bin/crle'):
+ return None
+
+ env = dict(os.environ)
+ env['LC_ALL'] = 'C'
+
+ if is64:
+ args = ('/usr/bin/crle', '-64')
+ else:
+ args = ('/usr/bin/crle',)
+
+ paths = None
+ try:
+ proc = subprocess.Popen(args,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.DEVNULL,
+ env=env)
+ except OSError: # E.g. bad executable
+ return None
+ with proc:
+ for line in proc.stdout:
+ line = line.strip()
+ if line.startswith(b'Default Library Path (ELF):'):
+ paths = os.fsdecode(line).split()[4]
+
+ if not paths:
+ return None
+
+ for dir in paths.split(":"):
+ libfile = os.path.join(dir, "lib%s.so" % name)
+ if os.path.exists(libfile):
+ return libfile
+
+ return None
+
+ def find_library(name, is64 = False):
+ return _get_soname(_findLib_crle(name, is64) or _findLib_gcc(name))
+
+ else:
+
+ def _findSoname_ldconfig(name):
+ import struct
+ if struct.calcsize('l') == 4:
+ machine = os.uname().machine + '-32'
+ else:
+ machine = os.uname().machine + '-64'
+ mach_map = {
+ 'x86_64-64': 'libc6,x86-64',
+ 'ppc64-64': 'libc6,64bit',
+ 'sparc64-64': 'libc6,64bit',
+ 's390x-64': 'libc6,64bit',
+ 'ia64-64': 'libc6,IA-64',
+ }
+ abi_type = mach_map.get(machine, 'libc6')
+
+ # XXX assuming GLIBC's ldconfig (with option -p)
+ regex = r'\s+(lib%s\.[^\s]+)\s+\(%s'
+ regex = os.fsencode(regex % (re.escape(name), abi_type))
+ try:
+ with subprocess.Popen(['/sbin/ldconfig', '-p'],
+ stdin=subprocess.DEVNULL,
+ stderr=subprocess.DEVNULL,
+ stdout=subprocess.PIPE,
+ env={'LC_ALL': 'C', 'LANG': 'C'}) as p:
+ res = re.search(regex, p.stdout.read())
+ if res:
+ return os.fsdecode(res.group(1))
+ except OSError:
+ pass
+
+ def _findLib_ld(name):
+ # See issue #9998 for why this is needed
+ expr = r'[^\(\)\s]*lib%s\.[^\(\)\s]*' % re.escape(name)
+ cmd = ['ld', '-t']
+ libpath = os.environ.get('LD_LIBRARY_PATH')
+ if libpath:
+ for d in libpath.split(':'):
+ cmd.extend(['-L', d])
+ cmd.extend(['-o', os.devnull, '-l%s' % name])
+ result = None
+ try:
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ universal_newlines=True)
+ out, _ = p.communicate()
+ res = re.search(expr, os.fsdecode(out))
+ if res:
+ result = res.group(0)
+ except Exception as e:
+ pass # result will be None
+ return result
+
+ def find_library(name):
+ # See issue #9998
+ return _findSoname_ldconfig(name) or \
+ _get_soname(_findLib_gcc(name) or _findLib_ld(name))
+
+################################################################
+# test code
+
+def test():
+ from ctypes import cdll
+ if os.name == "nt":
+ print(cdll.msvcrt)
+ print(cdll.load("msvcrt"))
+ print(find_library("msvcrt"))
+
+ if os.name == "posix":
+ # find and load_version
+ print(find_library("m"))
+ print(find_library("c"))
+ print(find_library("bz2"))
+
+ # load
+ if sys.platform == "darwin":
+ print(cdll.LoadLibrary("libm.dylib"))
+ print(cdll.LoadLibrary("libcrypto.dylib"))
+ print(cdll.LoadLibrary("libSystem.dylib"))
+ print(cdll.LoadLibrary("System.framework/System"))
+ # issue-26439 - fix broken test call for AIX
+ elif sys.platform.startswith("aix"):
+ from ctypes import CDLL
+ if sys.maxsize < 2**32:
+ print(f"Using CDLL(name, os.RTLD_MEMBER): {CDLL('libc.a(shr.o)', os.RTLD_MEMBER)}")
+ print(f"Using cdll.LoadLibrary(): {cdll.LoadLibrary('libc.a(shr.o)')}")
+ # librpm.so is only available as 32-bit shared library
+ print(find_library("rpm"))
+ print(cdll.LoadLibrary("librpm.so"))
+ else:
+ print(f"Using CDLL(name, os.RTLD_MEMBER): {CDLL('libc.a(shr_64.o)', os.RTLD_MEMBER)}")
+ print(f"Using cdll.LoadLibrary(): {cdll.LoadLibrary('libc.a(shr_64.o)')}")
+ print(f"crypt\t:: {find_library('crypt')}")
+ print(f"crypt\t:: {cdll.LoadLibrary(find_library('crypt'))}")
+ print(f"crypto\t:: {find_library('crypto')}")
+ print(f"crypto\t:: {cdll.LoadLibrary(find_library('crypto'))}")
+ else:
+ print(cdll.LoadLibrary("libm.so"))
+ print(cdll.LoadLibrary("libcrypt.so"))
+ print(find_library("crypt"))
+
+if __name__ == "__main__":
+ test()
diff --git a/dist/lib/ctypes/wintypes.py b/dist/lib/ctypes/wintypes.py
new file mode 100644
index 0000000..c619d27
--- /dev/null
+++ b/dist/lib/ctypes/wintypes.py
@@ -0,0 +1,202 @@
+# The most useful windows datatypes
+import ctypes
+
+BYTE = ctypes.c_byte
+WORD = ctypes.c_ushort
+DWORD = ctypes.c_ulong
+
+#UCHAR = ctypes.c_uchar
+CHAR = ctypes.c_char
+WCHAR = ctypes.c_wchar
+UINT = ctypes.c_uint
+INT = ctypes.c_int
+
+DOUBLE = ctypes.c_double
+FLOAT = ctypes.c_float
+
+BOOLEAN = BYTE
+BOOL = ctypes.c_long
+
+class VARIANT_BOOL(ctypes._SimpleCData):
+ _type_ = "v"
+ def __repr__(self):
+ return "%s(%r)" % (self.__class__.__name__, self.value)
+
+ULONG = ctypes.c_ulong
+LONG = ctypes.c_long
+
+USHORT = ctypes.c_ushort
+SHORT = ctypes.c_short
+
+# in the windows header files, these are structures.
+_LARGE_INTEGER = LARGE_INTEGER = ctypes.c_longlong
+_ULARGE_INTEGER = ULARGE_INTEGER = ctypes.c_ulonglong
+
+LPCOLESTR = LPOLESTR = OLESTR = ctypes.c_wchar_p
+LPCWSTR = LPWSTR = ctypes.c_wchar_p
+LPCSTR = LPSTR = ctypes.c_char_p
+LPCVOID = LPVOID = ctypes.c_void_p
+
+# WPARAM is defined as UINT_PTR (unsigned type)
+# LPARAM is defined as LONG_PTR (signed type)
+if ctypes.sizeof(ctypes.c_long) == ctypes.sizeof(ctypes.c_void_p):
+ WPARAM = ctypes.c_ulong
+ LPARAM = ctypes.c_long
+elif ctypes.sizeof(ctypes.c_longlong) == ctypes.sizeof(ctypes.c_void_p):
+ WPARAM = ctypes.c_ulonglong
+ LPARAM = ctypes.c_longlong
+
+ATOM = WORD
+LANGID = WORD
+
+COLORREF = DWORD
+LGRPID = DWORD
+LCTYPE = DWORD
+
+LCID = DWORD
+
+################################################################
+# HANDLE types
+HANDLE = ctypes.c_void_p # in the header files: void *
+
+HACCEL = HANDLE
+HBITMAP = HANDLE
+HBRUSH = HANDLE
+HCOLORSPACE = HANDLE
+HDC = HANDLE
+HDESK = HANDLE
+HDWP = HANDLE
+HENHMETAFILE = HANDLE
+HFONT = HANDLE
+HGDIOBJ = HANDLE
+HGLOBAL = HANDLE
+HHOOK = HANDLE
+HICON = HANDLE
+HINSTANCE = HANDLE
+HKEY = HANDLE
+HKL = HANDLE
+HLOCAL = HANDLE
+HMENU = HANDLE
+HMETAFILE = HANDLE
+HMODULE = HANDLE
+HMONITOR = HANDLE
+HPALETTE = HANDLE
+HPEN = HANDLE
+HRGN = HANDLE
+HRSRC = HANDLE
+HSTR = HANDLE
+HTASK = HANDLE
+HWINSTA = HANDLE
+HWND = HANDLE
+SC_HANDLE = HANDLE
+SERVICE_STATUS_HANDLE = HANDLE
+
+################################################################
+# Some important structure definitions
+
+class RECT(ctypes.Structure):
+ _fields_ = [("left", LONG),
+ ("top", LONG),
+ ("right", LONG),
+ ("bottom", LONG)]
+tagRECT = _RECTL = RECTL = RECT
+
+class _SMALL_RECT(ctypes.Structure):
+ _fields_ = [('Left', SHORT),
+ ('Top', SHORT),
+ ('Right', SHORT),
+ ('Bottom', SHORT)]
+SMALL_RECT = _SMALL_RECT
+
+class _COORD(ctypes.Structure):
+ _fields_ = [('X', SHORT),
+ ('Y', SHORT)]
+
+class POINT(ctypes.Structure):
+ _fields_ = [("x", LONG),
+ ("y", LONG)]
+tagPOINT = _POINTL = POINTL = POINT
+
+class SIZE(ctypes.Structure):
+ _fields_ = [("cx", LONG),
+ ("cy", LONG)]
+tagSIZE = SIZEL = SIZE
+
+def RGB(red, green, blue):
+ return red + (green << 8) + (blue << 16)
+
+class FILETIME(ctypes.Structure):
+ _fields_ = [("dwLowDateTime", DWORD),
+ ("dwHighDateTime", DWORD)]
+_FILETIME = FILETIME
+
+class MSG(ctypes.Structure):
+ _fields_ = [("hWnd", HWND),
+ ("message", UINT),
+ ("wParam", WPARAM),
+ ("lParam", LPARAM),
+ ("time", DWORD),
+ ("pt", POINT)]
+tagMSG = MSG
+MAX_PATH = 260
+
+class WIN32_FIND_DATAA(ctypes.Structure):
+ _fields_ = [("dwFileAttributes", DWORD),
+ ("ftCreationTime", FILETIME),
+ ("ftLastAccessTime", FILETIME),
+ ("ftLastWriteTime", FILETIME),
+ ("nFileSizeHigh", DWORD),
+ ("nFileSizeLow", DWORD),
+ ("dwReserved0", DWORD),
+ ("dwReserved1", DWORD),
+ ("cFileName", CHAR * MAX_PATH),
+ ("cAlternateFileName", CHAR * 14)]
+
+class WIN32_FIND_DATAW(ctypes.Structure):
+ _fields_ = [("dwFileAttributes", DWORD),
+ ("ftCreationTime", FILETIME),
+ ("ftLastAccessTime", FILETIME),
+ ("ftLastWriteTime", FILETIME),
+ ("nFileSizeHigh", DWORD),
+ ("nFileSizeLow", DWORD),
+ ("dwReserved0", DWORD),
+ ("dwReserved1", DWORD),
+ ("cFileName", WCHAR * MAX_PATH),
+ ("cAlternateFileName", WCHAR * 14)]
+
+################################################################
+# Pointer types
+
+LPBOOL = PBOOL = ctypes.POINTER(BOOL)
+PBOOLEAN = ctypes.POINTER(BOOLEAN)
+LPBYTE = PBYTE = ctypes.POINTER(BYTE)
+PCHAR = ctypes.POINTER(CHAR)
+LPCOLORREF = ctypes.POINTER(COLORREF)
+LPDWORD = PDWORD = ctypes.POINTER(DWORD)
+LPFILETIME = PFILETIME = ctypes.POINTER(FILETIME)
+PFLOAT = ctypes.POINTER(FLOAT)
+LPHANDLE = PHANDLE = ctypes.POINTER(HANDLE)
+PHKEY = ctypes.POINTER(HKEY)
+LPHKL = ctypes.POINTER(HKL)
+LPINT = PINT = ctypes.POINTER(INT)
+PLARGE_INTEGER = ctypes.POINTER(LARGE_INTEGER)
+PLCID = ctypes.POINTER(LCID)
+LPLONG = PLONG = ctypes.POINTER(LONG)
+LPMSG = PMSG = ctypes.POINTER(MSG)
+LPPOINT = PPOINT = ctypes.POINTER(POINT)
+PPOINTL = ctypes.POINTER(POINTL)
+LPRECT = PRECT = ctypes.POINTER(RECT)
+LPRECTL = PRECTL = ctypes.POINTER(RECTL)
+LPSC_HANDLE = ctypes.POINTER(SC_HANDLE)
+PSHORT = ctypes.POINTER(SHORT)
+LPSIZE = PSIZE = ctypes.POINTER(SIZE)
+LPSIZEL = PSIZEL = ctypes.POINTER(SIZEL)
+PSMALL_RECT = ctypes.POINTER(SMALL_RECT)
+LPUINT = PUINT = ctypes.POINTER(UINT)
+PULARGE_INTEGER = ctypes.POINTER(ULARGE_INTEGER)
+PULONG = ctypes.POINTER(ULONG)
+PUSHORT = ctypes.POINTER(USHORT)
+PWCHAR = ctypes.POINTER(WCHAR)
+LPWIN32_FIND_DATAA = PWIN32_FIND_DATAA = ctypes.POINTER(WIN32_FIND_DATAA)
+LPWIN32_FIND_DATAW = PWIN32_FIND_DATAW = ctypes.POINTER(WIN32_FIND_DATAW)
+LPWORD = PWORD = ctypes.POINTER(WORD)
diff --git a/dist/lib/curses/__init__.py b/dist/lib/curses/__init__.py
new file mode 100644
index 0000000..24ff3ca
--- /dev/null
+++ b/dist/lib/curses/__init__.py
@@ -0,0 +1,113 @@
+"""curses
+
+The main package for curses support for Python. Normally used by importing
+the package, and perhaps a particular module inside it.
+
+ import curses
+ from curses import textpad
+ curses.initscr()
+ ...
+
+"""
+
+from _curses import *
+import os as _os
+import sys as _sys
+
+# Some constants, most notably the ACS_* ones, are only added to the C
+# _curses module's dictionary after initscr() is called. (Some
+# versions of SGI's curses don't define values for those constants
+# until initscr() has been called.) This wrapper function calls the
+# underlying C initscr(), and then copies the constants from the
+# _curses module to the curses package's dictionary. Don't do 'from
+# curses import *' if you'll be needing the ACS_* constants.
+
+def initscr():
+ import _curses, curses
+ # we call setupterm() here because it raises an error
+ # instead of calling exit() in error cases.
+ setupterm(term=_os.environ.get("TERM", "unknown"),
+ fd=_sys.__stdout__.fileno())
+ stdscr = _curses.initscr()
+ for key, value in _curses.__dict__.items():
+ if key[0:4] == 'ACS_' or key in ('LINES', 'COLS'):
+ setattr(curses, key, value)
+
+ return stdscr
+
+# This is a similar wrapper for start_color(), which adds the COLORS and
+# COLOR_PAIRS variables which are only available after start_color() is
+# called.
+
+def start_color():
+ import _curses, curses
+ retval = _curses.start_color()
+ if hasattr(_curses, 'COLORS'):
+ curses.COLORS = _curses.COLORS
+ if hasattr(_curses, 'COLOR_PAIRS'):
+ curses.COLOR_PAIRS = _curses.COLOR_PAIRS
+ return retval
+
+# Import Python has_key() implementation if _curses doesn't contain has_key()
+
+try:
+ has_key
+except NameError:
+ from .has_key import has_key
+
+# Wrapper for the entire curses-based application. Runs a function which
+# should be the rest of your curses-based application. If the application
+# raises an exception, wrapper() will restore the terminal to a sane state so
+# you can read the resulting traceback.
+
+def wrapper(*args, **kwds):
+ """Wrapper function that initializes curses and calls another function,
+ restoring normal keyboard/screen behavior on error.
+ The callable object 'func' is then passed the main window 'stdscr'
+ as its first argument, followed by any other arguments passed to
+ wrapper().
+ """
+
+ if args:
+ func, *args = args
+ elif 'func' in kwds:
+ func = kwds.pop('func')
+ import warnings
+ warnings.warn("Passing 'func' as keyword argument is deprecated",
+ DeprecationWarning, stacklevel=2)
+ else:
+ raise TypeError('wrapper expected at least 1 positional argument, '
+ 'got %d' % len(args))
+
+ try:
+ # Initialize curses
+ stdscr = initscr()
+
+ # Turn off echoing of keys, and enter cbreak mode,
+ # where no buffering is performed on keyboard input
+ noecho()
+ cbreak()
+
+ # In keypad mode, escape sequences for special keys
+ # (like the cursor keys) will be interpreted and
+ # a special value like curses.KEY_LEFT will be returned
+ stdscr.keypad(1)
+
+ # Start color, too. Harmless if the terminal doesn't have
+ # color; user can test with has_color() later on. The try/catch
+ # works around a minor bit of over-conscientiousness in the curses
+ # module -- the error return from C start_color() is ignorable.
+ try:
+ start_color()
+ except:
+ pass
+
+ return func(stdscr, *args, **kwds)
+ finally:
+ # Set everything back to normal
+ if 'stdscr' in locals():
+ stdscr.keypad(0)
+ echo()
+ nocbreak()
+ endwin()
+wrapper.__text_signature__ = '(func, /, *args, **kwds)'
diff --git a/dist/lib/curses/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/lib/curses/__pycache__/__init__.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..8a865d6
Binary files /dev/null and b/dist/lib/curses/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/curses/__pycache__/ascii.cpython-38.opt-1.pyc b/dist/lib/curses/__pycache__/ascii.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..54fec8e
Binary files /dev/null and b/dist/lib/curses/__pycache__/ascii.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/curses/__pycache__/has_key.cpython-38.opt-1.pyc b/dist/lib/curses/__pycache__/has_key.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..bc07fac
Binary files /dev/null and b/dist/lib/curses/__pycache__/has_key.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/curses/__pycache__/panel.cpython-38.opt-1.pyc b/dist/lib/curses/__pycache__/panel.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..261c561
Binary files /dev/null and b/dist/lib/curses/__pycache__/panel.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/curses/__pycache__/textpad.cpython-38.opt-1.pyc b/dist/lib/curses/__pycache__/textpad.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..0d34d65
Binary files /dev/null and b/dist/lib/curses/__pycache__/textpad.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/curses/ascii.py b/dist/lib/curses/ascii.py
new file mode 100644
index 0000000..5b243be
--- /dev/null
+++ b/dist/lib/curses/ascii.py
@@ -0,0 +1,99 @@
+"""Constants and membership tests for ASCII characters"""
+
+NUL = 0x00 # ^@
+SOH = 0x01 # ^A
+STX = 0x02 # ^B
+ETX = 0x03 # ^C
+EOT = 0x04 # ^D
+ENQ = 0x05 # ^E
+ACK = 0x06 # ^F
+BEL = 0x07 # ^G
+BS = 0x08 # ^H
+TAB = 0x09 # ^I
+HT = 0x09 # ^I
+LF = 0x0a # ^J
+NL = 0x0a # ^J
+VT = 0x0b # ^K
+FF = 0x0c # ^L
+CR = 0x0d # ^M
+SO = 0x0e # ^N
+SI = 0x0f # ^O
+DLE = 0x10 # ^P
+DC1 = 0x11 # ^Q
+DC2 = 0x12 # ^R
+DC3 = 0x13 # ^S
+DC4 = 0x14 # ^T
+NAK = 0x15 # ^U
+SYN = 0x16 # ^V
+ETB = 0x17 # ^W
+CAN = 0x18 # ^X
+EM = 0x19 # ^Y
+SUB = 0x1a # ^Z
+ESC = 0x1b # ^[
+FS = 0x1c # ^\
+GS = 0x1d # ^]
+RS = 0x1e # ^^
+US = 0x1f # ^_
+SP = 0x20 # space
+DEL = 0x7f # delete
+
+controlnames = [
+"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
+"BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",
+"DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
+"CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
+"SP"
+]
+
+def _ctoi(c):
+ if type(c) == type(""):
+ return ord(c)
+ else:
+ return c
+
+def isalnum(c): return isalpha(c) or isdigit(c)
+def isalpha(c): return isupper(c) or islower(c)
+def isascii(c): return 0 <= _ctoi(c) <= 127 # ?
+def isblank(c): return _ctoi(c) in (9, 32)
+def iscntrl(c): return 0 <= _ctoi(c) <= 31 or _ctoi(c) == 127
+def isdigit(c): return 48 <= _ctoi(c) <= 57
+def isgraph(c): return 33 <= _ctoi(c) <= 126
+def islower(c): return 97 <= _ctoi(c) <= 122
+def isprint(c): return 32 <= _ctoi(c) <= 126
+def ispunct(c): return isgraph(c) and not isalnum(c)
+def isspace(c): return _ctoi(c) in (9, 10, 11, 12, 13, 32)
+def isupper(c): return 65 <= _ctoi(c) <= 90
+def isxdigit(c): return isdigit(c) or \
+ (65 <= _ctoi(c) <= 70) or (97 <= _ctoi(c) <= 102)
+def isctrl(c): return 0 <= _ctoi(c) < 32
+def ismeta(c): return _ctoi(c) > 127
+
+def ascii(c):
+ if type(c) == type(""):
+ return chr(_ctoi(c) & 0x7f)
+ else:
+ return _ctoi(c) & 0x7f
+
+def ctrl(c):
+ if type(c) == type(""):
+ return chr(_ctoi(c) & 0x1f)
+ else:
+ return _ctoi(c) & 0x1f
+
+def alt(c):
+ if type(c) == type(""):
+ return chr(_ctoi(c) | 0x80)
+ else:
+ return _ctoi(c) | 0x80
+
+def unctrl(c):
+ bits = _ctoi(c)
+ if bits == 0x7f:
+ rep = "^?"
+ elif isprint(bits & 0x7f):
+ rep = chr(bits & 0x7f)
+ else:
+ rep = "^" + chr(((bits & 0x7f) | 0x20) + 0x20)
+ if bits & 0x80:
+ return "!" + rep
+ return rep
diff --git a/dist/lib/curses/has_key.py b/dist/lib/curses/has_key.py
new file mode 100644
index 0000000..4e37b48
--- /dev/null
+++ b/dist/lib/curses/has_key.py
@@ -0,0 +1,192 @@
+
+#
+# Emulation of has_key() function for platforms that don't use ncurses
+#
+
+import _curses
+
+# Table mapping curses keys to the terminfo capability name
+
+_capability_names = {
+ _curses.KEY_A1: 'ka1',
+ _curses.KEY_A3: 'ka3',
+ _curses.KEY_B2: 'kb2',
+ _curses.KEY_BACKSPACE: 'kbs',
+ _curses.KEY_BEG: 'kbeg',
+ _curses.KEY_BTAB: 'kcbt',
+ _curses.KEY_C1: 'kc1',
+ _curses.KEY_C3: 'kc3',
+ _curses.KEY_CANCEL: 'kcan',
+ _curses.KEY_CATAB: 'ktbc',
+ _curses.KEY_CLEAR: 'kclr',
+ _curses.KEY_CLOSE: 'kclo',
+ _curses.KEY_COMMAND: 'kcmd',
+ _curses.KEY_COPY: 'kcpy',
+ _curses.KEY_CREATE: 'kcrt',
+ _curses.KEY_CTAB: 'kctab',
+ _curses.KEY_DC: 'kdch1',
+ _curses.KEY_DL: 'kdl1',
+ _curses.KEY_DOWN: 'kcud1',
+ _curses.KEY_EIC: 'krmir',
+ _curses.KEY_END: 'kend',
+ _curses.KEY_ENTER: 'kent',
+ _curses.KEY_EOL: 'kel',
+ _curses.KEY_EOS: 'ked',
+ _curses.KEY_EXIT: 'kext',
+ _curses.KEY_F0: 'kf0',
+ _curses.KEY_F1: 'kf1',
+ _curses.KEY_F10: 'kf10',
+ _curses.KEY_F11: 'kf11',
+ _curses.KEY_F12: 'kf12',
+ _curses.KEY_F13: 'kf13',
+ _curses.KEY_F14: 'kf14',
+ _curses.KEY_F15: 'kf15',
+ _curses.KEY_F16: 'kf16',
+ _curses.KEY_F17: 'kf17',
+ _curses.KEY_F18: 'kf18',
+ _curses.KEY_F19: 'kf19',
+ _curses.KEY_F2: 'kf2',
+ _curses.KEY_F20: 'kf20',
+ _curses.KEY_F21: 'kf21',
+ _curses.KEY_F22: 'kf22',
+ _curses.KEY_F23: 'kf23',
+ _curses.KEY_F24: 'kf24',
+ _curses.KEY_F25: 'kf25',
+ _curses.KEY_F26: 'kf26',
+ _curses.KEY_F27: 'kf27',
+ _curses.KEY_F28: 'kf28',
+ _curses.KEY_F29: 'kf29',
+ _curses.KEY_F3: 'kf3',
+ _curses.KEY_F30: 'kf30',
+ _curses.KEY_F31: 'kf31',
+ _curses.KEY_F32: 'kf32',
+ _curses.KEY_F33: 'kf33',
+ _curses.KEY_F34: 'kf34',
+ _curses.KEY_F35: 'kf35',
+ _curses.KEY_F36: 'kf36',
+ _curses.KEY_F37: 'kf37',
+ _curses.KEY_F38: 'kf38',
+ _curses.KEY_F39: 'kf39',
+ _curses.KEY_F4: 'kf4',
+ _curses.KEY_F40: 'kf40',
+ _curses.KEY_F41: 'kf41',
+ _curses.KEY_F42: 'kf42',
+ _curses.KEY_F43: 'kf43',
+ _curses.KEY_F44: 'kf44',
+ _curses.KEY_F45: 'kf45',
+ _curses.KEY_F46: 'kf46',
+ _curses.KEY_F47: 'kf47',
+ _curses.KEY_F48: 'kf48',
+ _curses.KEY_F49: 'kf49',
+ _curses.KEY_F5: 'kf5',
+ _curses.KEY_F50: 'kf50',
+ _curses.KEY_F51: 'kf51',
+ _curses.KEY_F52: 'kf52',
+ _curses.KEY_F53: 'kf53',
+ _curses.KEY_F54: 'kf54',
+ _curses.KEY_F55: 'kf55',
+ _curses.KEY_F56: 'kf56',
+ _curses.KEY_F57: 'kf57',
+ _curses.KEY_F58: 'kf58',
+ _curses.KEY_F59: 'kf59',
+ _curses.KEY_F6: 'kf6',
+ _curses.KEY_F60: 'kf60',
+ _curses.KEY_F61: 'kf61',
+ _curses.KEY_F62: 'kf62',
+ _curses.KEY_F63: 'kf63',
+ _curses.KEY_F7: 'kf7',
+ _curses.KEY_F8: 'kf8',
+ _curses.KEY_F9: 'kf9',
+ _curses.KEY_FIND: 'kfnd',
+ _curses.KEY_HELP: 'khlp',
+ _curses.KEY_HOME: 'khome',
+ _curses.KEY_IC: 'kich1',
+ _curses.KEY_IL: 'kil1',
+ _curses.KEY_LEFT: 'kcub1',
+ _curses.KEY_LL: 'kll',
+ _curses.KEY_MARK: 'kmrk',
+ _curses.KEY_MESSAGE: 'kmsg',
+ _curses.KEY_MOVE: 'kmov',
+ _curses.KEY_NEXT: 'knxt',
+ _curses.KEY_NPAGE: 'knp',
+ _curses.KEY_OPEN: 'kopn',
+ _curses.KEY_OPTIONS: 'kopt',
+ _curses.KEY_PPAGE: 'kpp',
+ _curses.KEY_PREVIOUS: 'kprv',
+ _curses.KEY_PRINT: 'kprt',
+ _curses.KEY_REDO: 'krdo',
+ _curses.KEY_REFERENCE: 'kref',
+ _curses.KEY_REFRESH: 'krfr',
+ _curses.KEY_REPLACE: 'krpl',
+ _curses.KEY_RESTART: 'krst',
+ _curses.KEY_RESUME: 'kres',
+ _curses.KEY_RIGHT: 'kcuf1',
+ _curses.KEY_SAVE: 'ksav',
+ _curses.KEY_SBEG: 'kBEG',
+ _curses.KEY_SCANCEL: 'kCAN',
+ _curses.KEY_SCOMMAND: 'kCMD',
+ _curses.KEY_SCOPY: 'kCPY',
+ _curses.KEY_SCREATE: 'kCRT',
+ _curses.KEY_SDC: 'kDC',
+ _curses.KEY_SDL: 'kDL',
+ _curses.KEY_SELECT: 'kslt',
+ _curses.KEY_SEND: 'kEND',
+ _curses.KEY_SEOL: 'kEOL',
+ _curses.KEY_SEXIT: 'kEXT',
+ _curses.KEY_SF: 'kind',
+ _curses.KEY_SFIND: 'kFND',
+ _curses.KEY_SHELP: 'kHLP',
+ _curses.KEY_SHOME: 'kHOM',
+ _curses.KEY_SIC: 'kIC',
+ _curses.KEY_SLEFT: 'kLFT',
+ _curses.KEY_SMESSAGE: 'kMSG',
+ _curses.KEY_SMOVE: 'kMOV',
+ _curses.KEY_SNEXT: 'kNXT',
+ _curses.KEY_SOPTIONS: 'kOPT',
+ _curses.KEY_SPREVIOUS: 'kPRV',
+ _curses.KEY_SPRINT: 'kPRT',
+ _curses.KEY_SR: 'kri',
+ _curses.KEY_SREDO: 'kRDO',
+ _curses.KEY_SREPLACE: 'kRPL',
+ _curses.KEY_SRIGHT: 'kRIT',
+ _curses.KEY_SRSUME: 'kRES',
+ _curses.KEY_SSAVE: 'kSAV',
+ _curses.KEY_SSUSPEND: 'kSPD',
+ _curses.KEY_STAB: 'khts',
+ _curses.KEY_SUNDO: 'kUND',
+ _curses.KEY_SUSPEND: 'kspd',
+ _curses.KEY_UNDO: 'kund',
+ _curses.KEY_UP: 'kcuu1'
+ }
+
+def has_key(ch):
+ if isinstance(ch, str):
+ ch = ord(ch)
+
+ # Figure out the correct capability name for the keycode.
+ capability_name = _capability_names.get(ch)
+ if capability_name is None:
+ return False
+
+ #Check the current terminal description for that capability;
+ #if present, return true, else return false.
+ if _curses.tigetstr( capability_name ):
+ return True
+ else:
+ return False
+
+if __name__ == '__main__':
+ # Compare the output of this implementation and the ncurses has_key,
+ # on platforms where has_key is already available
+ try:
+ L = []
+ _curses.initscr()
+ for key in _capability_names.keys():
+ system = _curses.has_key(key)
+ python = has_key(key)
+ if system != python:
+ L.append( 'Mismatch for key %s, system=%i, Python=%i'
+ % (_curses.keyname( key ), system, python) )
+ finally:
+ _curses.endwin()
+ for i in L: print(i)
diff --git a/dist/lib/curses/panel.py b/dist/lib/curses/panel.py
new file mode 100644
index 0000000..067afef
--- /dev/null
+++ b/dist/lib/curses/panel.py
@@ -0,0 +1,6 @@
+"""curses.panel
+
+Module for using panels with curses.
+"""
+
+from _curses_panel import *
diff --git a/dist/lib/curses/textpad.py b/dist/lib/curses/textpad.py
new file mode 100644
index 0000000..2079953
--- /dev/null
+++ b/dist/lib/curses/textpad.py
@@ -0,0 +1,201 @@
+"""Simple textbox editing widget with Emacs-like keybindings."""
+
+import curses
+import curses.ascii
+
+def rectangle(win, uly, ulx, lry, lrx):
+ """Draw a rectangle with corners at the provided upper-left
+ and lower-right coordinates.
+ """
+ win.vline(uly+1, ulx, curses.ACS_VLINE, lry - uly - 1)
+ win.hline(uly, ulx+1, curses.ACS_HLINE, lrx - ulx - 1)
+ win.hline(lry, ulx+1, curses.ACS_HLINE, lrx - ulx - 1)
+ win.vline(uly+1, lrx, curses.ACS_VLINE, lry - uly - 1)
+ win.addch(uly, ulx, curses.ACS_ULCORNER)
+ win.addch(uly, lrx, curses.ACS_URCORNER)
+ win.addch(lry, lrx, curses.ACS_LRCORNER)
+ win.addch(lry, ulx, curses.ACS_LLCORNER)
+
+class Textbox:
+ """Editing widget using the interior of a window object.
+ Supports the following Emacs-like key bindings:
+
+ Ctrl-A Go to left edge of window.
+ Ctrl-B Cursor left, wrapping to previous line if appropriate.
+ Ctrl-D Delete character under cursor.
+ Ctrl-E Go to right edge (stripspaces off) or end of line (stripspaces on).
+ Ctrl-F Cursor right, wrapping to next line when appropriate.
+ Ctrl-G Terminate, returning the window contents.
+ Ctrl-H Delete character backward.
+ Ctrl-J Terminate if the window is 1 line, otherwise insert newline.
+ Ctrl-K If line is blank, delete it, otherwise clear to end of line.
+ Ctrl-L Refresh screen.
+ Ctrl-N Cursor down; move down one line.
+ Ctrl-O Insert a blank line at cursor location.
+ Ctrl-P Cursor up; move up one line.
+
+ Move operations do nothing if the cursor is at an edge where the movement
+ is not possible. The following synonyms are supported where possible:
+
+ KEY_LEFT = Ctrl-B, KEY_RIGHT = Ctrl-F, KEY_UP = Ctrl-P, KEY_DOWN = Ctrl-N
+ KEY_BACKSPACE = Ctrl-h
+ """
+ def __init__(self, win, insert_mode=False):
+ self.win = win
+ self.insert_mode = insert_mode
+ self._update_max_yx()
+ self.stripspaces = 1
+ self.lastcmd = None
+ win.keypad(1)
+
+ def _update_max_yx(self):
+ maxy, maxx = self.win.getmaxyx()
+ self.maxy = maxy - 1
+ self.maxx = maxx - 1
+
+ def _end_of_line(self, y):
+ """Go to the location of the first blank on the given line,
+ returning the index of the last non-blank character."""
+ self._update_max_yx()
+ last = self.maxx
+ while True:
+ if curses.ascii.ascii(self.win.inch(y, last)) != curses.ascii.SP:
+ last = min(self.maxx, last+1)
+ break
+ elif last == 0:
+ break
+ last = last - 1
+ return last
+
+ def _insert_printable_char(self, ch):
+ self._update_max_yx()
+ (y, x) = self.win.getyx()
+ backyx = None
+ while y < self.maxy or x < self.maxx:
+ if self.insert_mode:
+ oldch = self.win.inch()
+ # The try-catch ignores the error we trigger from some curses
+ # versions by trying to write into the lowest-rightmost spot
+ # in the window.
+ try:
+ self.win.addch(ch)
+ except curses.error:
+ pass
+ if not self.insert_mode or not curses.ascii.isprint(oldch):
+ break
+ ch = oldch
+ (y, x) = self.win.getyx()
+ # Remember where to put the cursor back since we are in insert_mode
+ if backyx is None:
+ backyx = y, x
+
+ if backyx is not None:
+ self.win.move(*backyx)
+
+ def do_command(self, ch):
+ "Process a single editing command."
+ self._update_max_yx()
+ (y, x) = self.win.getyx()
+ self.lastcmd = ch
+ if curses.ascii.isprint(ch):
+ if y < self.maxy or x < self.maxx:
+ self._insert_printable_char(ch)
+ elif ch == curses.ascii.SOH: # ^a
+ self.win.move(y, 0)
+ elif ch in (curses.ascii.STX,curses.KEY_LEFT, curses.ascii.BS,curses.KEY_BACKSPACE):
+ if x > 0:
+ self.win.move(y, x-1)
+ elif y == 0:
+ pass
+ elif self.stripspaces:
+ self.win.move(y-1, self._end_of_line(y-1))
+ else:
+ self.win.move(y-1, self.maxx)
+ if ch in (curses.ascii.BS, curses.KEY_BACKSPACE):
+ self.win.delch()
+ elif ch == curses.ascii.EOT: # ^d
+ self.win.delch()
+ elif ch == curses.ascii.ENQ: # ^e
+ if self.stripspaces:
+ self.win.move(y, self._end_of_line(y))
+ else:
+ self.win.move(y, self.maxx)
+ elif ch in (curses.ascii.ACK, curses.KEY_RIGHT): # ^f
+ if x < self.maxx:
+ self.win.move(y, x+1)
+ elif y == self.maxy:
+ pass
+ else:
+ self.win.move(y+1, 0)
+ elif ch == curses.ascii.BEL: # ^g
+ return 0
+ elif ch == curses.ascii.NL: # ^j
+ if self.maxy == 0:
+ return 0
+ elif y < self.maxy:
+ self.win.move(y+1, 0)
+ elif ch == curses.ascii.VT: # ^k
+ if x == 0 and self._end_of_line(y) == 0:
+ self.win.deleteln()
+ else:
+ # first undo the effect of self._end_of_line
+ self.win.move(y, x)
+ self.win.clrtoeol()
+ elif ch == curses.ascii.FF: # ^l
+ self.win.refresh()
+ elif ch in (curses.ascii.SO, curses.KEY_DOWN): # ^n
+ if y < self.maxy:
+ self.win.move(y+1, x)
+ if x > self._end_of_line(y+1):
+ self.win.move(y+1, self._end_of_line(y+1))
+ elif ch == curses.ascii.SI: # ^o
+ self.win.insertln()
+ elif ch in (curses.ascii.DLE, curses.KEY_UP): # ^p
+ if y > 0:
+ self.win.move(y-1, x)
+ if x > self._end_of_line(y-1):
+ self.win.move(y-1, self._end_of_line(y-1))
+ return 1
+
+ def gather(self):
+ "Collect and return the contents of the window."
+ result = ""
+ self._update_max_yx()
+ for y in range(self.maxy+1):
+ self.win.move(y, 0)
+ stop = self._end_of_line(y)
+ if stop == 0 and self.stripspaces:
+ continue
+ for x in range(self.maxx+1):
+ if self.stripspaces and x > stop:
+ break
+ result = result + chr(curses.ascii.ascii(self.win.inch(y, x)))
+ if self.maxy > 0:
+ result = result + "\n"
+ return result
+
+ def edit(self, validate=None):
+ "Edit in the widget window and collect the results."
+ while 1:
+ ch = self.win.getch()
+ if validate:
+ ch = validate(ch)
+ if not ch:
+ continue
+ if not self.do_command(ch):
+ break
+ self.win.refresh()
+ return self.gather()
+
+if __name__ == '__main__':
+ def test_editbox(stdscr):
+ ncols, nlines = 9, 4
+ uly, ulx = 15, 20
+ stdscr.addstr(uly-2, ulx, "Use Ctrl-G to end editing.")
+ win = curses.newwin(nlines, ncols, uly, ulx)
+ rectangle(stdscr, uly-1, ulx-1, uly + nlines, ulx + ncols)
+ stdscr.refresh()
+ return Textbox(win).edit()
+
+ str = curses.wrapper(test_editbox)
+ print('Contents of text box:', repr(str))
diff --git a/dist/lib/dataclasses.py b/dist/lib/dataclasses.py
new file mode 100644
index 0000000..10bb33e
--- /dev/null
+++ b/dist/lib/dataclasses.py
@@ -0,0 +1,1294 @@
+import re
+import sys
+import copy
+import types
+import inspect
+import keyword
+import builtins
+import functools
+import _thread
+
+
+__all__ = ['dataclass',
+ 'field',
+ 'Field',
+ 'FrozenInstanceError',
+ 'InitVar',
+ 'MISSING',
+
+ # Helper functions.
+ 'fields',
+ 'asdict',
+ 'astuple',
+ 'make_dataclass',
+ 'replace',
+ 'is_dataclass',
+ ]
+
+# Conditions for adding methods. The boxes indicate what action the
+# dataclass decorator takes. For all of these tables, when I talk
+# about init=, repr=, eq=, order=, unsafe_hash=, or frozen=, I'm
+# referring to the arguments to the @dataclass decorator. When
+# checking if a dunder method already exists, I mean check for an
+# entry in the class's __dict__. I never check to see if an attribute
+# is defined in a base class.
+
+# Key:
+# +=========+=========================================+
+# + Value | Meaning |
+# +=========+=========================================+
+# | | No action: no method is added. |
+# +---------+-----------------------------------------+
+# | add | Generated method is added. |
+# +---------+-----------------------------------------+
+# | raise | TypeError is raised. |
+# +---------+-----------------------------------------+
+# | None | Attribute is set to None. |
+# +=========+=========================================+
+
+# __init__
+#
+# +--- init= parameter
+# |
+# v | | |
+# | no | yes | <--- class has __init__ in __dict__?
+# +=======+=======+=======+
+# | False | | |
+# +-------+-------+-------+
+# | True | add | | <- the default
+# +=======+=======+=======+
+
+# __repr__
+#
+# +--- repr= parameter
+# |
+# v | | |
+# | no | yes | <--- class has __repr__ in __dict__?
+# +=======+=======+=======+
+# | False | | |
+# +-------+-------+-------+
+# | True | add | | <- the default
+# +=======+=======+=======+
+
+
+# __setattr__
+# __delattr__
+#
+# +--- frozen= parameter
+# |
+# v | | |
+# | no | yes | <--- class has __setattr__ or __delattr__ in __dict__?
+# +=======+=======+=======+
+# | False | | | <- the default
+# +-------+-------+-------+
+# | True | add | raise |
+# +=======+=======+=======+
+# Raise because not adding these methods would break the "frozen-ness"
+# of the class.
+
+# __eq__
+#
+# +--- eq= parameter
+# |
+# v | | |
+# | no | yes | <--- class has __eq__ in __dict__?
+# +=======+=======+=======+
+# | False | | |
+# +-------+-------+-------+
+# | True | add | | <- the default
+# +=======+=======+=======+
+
+# __lt__
+# __le__
+# __gt__
+# __ge__
+#
+# +--- order= parameter
+# |
+# v | | |
+# | no | yes | <--- class has any comparison method in __dict__?
+# +=======+=======+=======+
+# | False | | | <- the default
+# +-------+-------+-------+
+# | True | add | raise |
+# +=======+=======+=======+
+# Raise because to allow this case would interfere with using
+# functools.total_ordering.
+
+# __hash__
+
+# +------------------- unsafe_hash= parameter
+# | +----------- eq= parameter
+# | | +--- frozen= parameter
+# | | |
+# v v v | | |
+# | no | yes | <--- class has explicitly defined __hash__
+# +=======+=======+=======+========+========+
+# | False | False | False | | | No __eq__, use the base class __hash__
+# +-------+-------+-------+--------+--------+
+# | False | False | True | | | No __eq__, use the base class __hash__
+# +-------+-------+-------+--------+--------+
+# | False | True | False | None | | <-- the default, not hashable
+# +-------+-------+-------+--------+--------+
+# | False | True | True | add | | Frozen, so hashable, allows override
+# +-------+-------+-------+--------+--------+
+# | True | False | False | add | raise | Has no __eq__, but hashable
+# +-------+-------+-------+--------+--------+
+# | True | False | True | add | raise | Has no __eq__, but hashable
+# +-------+-------+-------+--------+--------+
+# | True | True | False | add | raise | Not frozen, but hashable
+# +-------+-------+-------+--------+--------+
+# | True | True | True | add | raise | Frozen, so hashable
+# +=======+=======+=======+========+========+
+# For boxes that are blank, __hash__ is untouched and therefore
+# inherited from the base class. If the base is object, then
+# id-based hashing is used.
+#
+# Note that a class may already have __hash__=None if it specified an
+# __eq__ method in the class body (not one that was created by
+# @dataclass).
+#
+# See _hash_action (below) for a coded version of this table.
+
+
+# Raised when an attempt is made to modify a frozen class.
+class FrozenInstanceError(AttributeError): pass
+
+# A sentinel object for default values to signal that a default
+# factory will be used. This is given a nice repr() which will appear
+# in the function signature of dataclasses' constructors.
+class _HAS_DEFAULT_FACTORY_CLASS:
+ def __repr__(self):
+ return ''
+_HAS_DEFAULT_FACTORY = _HAS_DEFAULT_FACTORY_CLASS()
+
+# A sentinel object to detect if a parameter is supplied or not. Use
+# a class to give it a better repr.
+class _MISSING_TYPE:
+ pass
+MISSING = _MISSING_TYPE()
+
+# Since most per-field metadata will be unused, create an empty
+# read-only proxy that can be shared among all fields.
+_EMPTY_METADATA = types.MappingProxyType({})
+
+# Markers for the various kinds of fields and pseudo-fields.
+class _FIELD_BASE:
+ def __init__(self, name):
+ self.name = name
+ def __repr__(self):
+ return self.name
+_FIELD = _FIELD_BASE('_FIELD')
+_FIELD_CLASSVAR = _FIELD_BASE('_FIELD_CLASSVAR')
+_FIELD_INITVAR = _FIELD_BASE('_FIELD_INITVAR')
+
+# The name of an attribute on the class where we store the Field
+# objects. Also used to check if a class is a Data Class.
+_FIELDS = '__dataclass_fields__'
+
+# The name of an attribute on the class that stores the parameters to
+# @dataclass.
+_PARAMS = '__dataclass_params__'
+
+# The name of the function, that if it exists, is called at the end of
+# __init__.
+_POST_INIT_NAME = '__post_init__'
+
+# String regex that string annotations for ClassVar or InitVar must match.
+# Allows "identifier.identifier[" or "identifier[".
+# https://bugs.python.org/issue33453 for details.
+_MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)')
+
+class _InitVarMeta(type):
+ def __getitem__(self, params):
+ return InitVar(params)
+
+class InitVar(metaclass=_InitVarMeta):
+ __slots__ = ('type', )
+
+ def __init__(self, type):
+ self.type = type
+
+ def __repr__(self):
+ if isinstance(self.type, type):
+ type_name = self.type.__name__
+ else:
+ # typing objects, e.g. List[int]
+ type_name = repr(self.type)
+ return f'dataclasses.InitVar[{type_name}]'
+
+
+# Instances of Field are only ever created from within this module,
+# and only from the field() function, although Field instances are
+# exposed externally as (conceptually) read-only objects.
+#
+# name and type are filled in after the fact, not in __init__.
+# They're not known at the time this class is instantiated, but it's
+# convenient if they're available later.
+#
+# When cls._FIELDS is filled in with a list of Field objects, the name
+# and type fields will have been populated.
+class Field:
+ __slots__ = ('name',
+ 'type',
+ 'default',
+ 'default_factory',
+ 'repr',
+ 'hash',
+ 'init',
+ 'compare',
+ 'metadata',
+ '_field_type', # Private: not to be used by user code.
+ )
+
+ def __init__(self, default, default_factory, init, repr, hash, compare,
+ metadata):
+ self.name = None
+ self.type = None
+ self.default = default
+ self.default_factory = default_factory
+ self.init = init
+ self.repr = repr
+ self.hash = hash
+ self.compare = compare
+ self.metadata = (_EMPTY_METADATA
+ if metadata is None else
+ types.MappingProxyType(metadata))
+ self._field_type = None
+
+ def __repr__(self):
+ return ('Field('
+ f'name={self.name!r},'
+ f'type={self.type!r},'
+ f'default={self.default!r},'
+ f'default_factory={self.default_factory!r},'
+ f'init={self.init!r},'
+ f'repr={self.repr!r},'
+ f'hash={self.hash!r},'
+ f'compare={self.compare!r},'
+ f'metadata={self.metadata!r},'
+ f'_field_type={self._field_type}'
+ ')')
+
+ # This is used to support the PEP 487 __set_name__ protocol in the
+ # case where we're using a field that contains a descriptor as a
+ # default value. For details on __set_name__, see
+ # https://www.python.org/dev/peps/pep-0487/#implementation-details.
+ #
+ # Note that in _process_class, this Field object is overwritten
+ # with the default value, so the end result is a descriptor that
+ # had __set_name__ called on it at the right time.
+ def __set_name__(self, owner, name):
+ func = getattr(type(self.default), '__set_name__', None)
+ if func:
+ # There is a __set_name__ method on the descriptor, call
+ # it.
+ func(self.default, owner, name)
+
+
+class _DataclassParams:
+ __slots__ = ('init',
+ 'repr',
+ 'eq',
+ 'order',
+ 'unsafe_hash',
+ 'frozen',
+ )
+
+ def __init__(self, init, repr, eq, order, unsafe_hash, frozen):
+ self.init = init
+ self.repr = repr
+ self.eq = eq
+ self.order = order
+ self.unsafe_hash = unsafe_hash
+ self.frozen = frozen
+
+ def __repr__(self):
+ return ('_DataclassParams('
+ f'init={self.init!r},'
+ f'repr={self.repr!r},'
+ f'eq={self.eq!r},'
+ f'order={self.order!r},'
+ f'unsafe_hash={self.unsafe_hash!r},'
+ f'frozen={self.frozen!r}'
+ ')')
+
+
+# This function is used instead of exposing Field creation directly,
+# so that a type checker can be told (via overloads) that this is a
+# function whose type depends on its parameters.
+def field(*, default=MISSING, default_factory=MISSING, init=True, repr=True,
+ hash=None, compare=True, metadata=None):
+ """Return an object to identify dataclass fields.
+
+ default is the default value of the field. default_factory is a
+ 0-argument function called to initialize a field's value. If init
+ is True, the field will be a parameter to the class's __init__()
+ function. If repr is True, the field will be included in the
+ object's repr(). If hash is True, the field will be included in
+ the object's hash(). If compare is True, the field will be used
+ in comparison functions. metadata, if specified, must be a
+ mapping which is stored but not otherwise examined by dataclass.
+
+ It is an error to specify both default and default_factory.
+ """
+
+ if default is not MISSING and default_factory is not MISSING:
+ raise ValueError('cannot specify both default and default_factory')
+ return Field(default, default_factory, init, repr, hash, compare,
+ metadata)
+
+
+def _tuple_str(obj_name, fields):
+ # Return a string representing each field of obj_name as a tuple
+ # member. So, if fields is ['x', 'y'] and obj_name is "self",
+ # return "(self.x,self.y)".
+
+ # Special case for the 0-tuple.
+ if not fields:
+ return '()'
+ # Note the trailing comma, needed if this turns out to be a 1-tuple.
+ return f'({",".join([f"{obj_name}.{f.name}" for f in fields])},)'
+
+
+# This function's logic is copied from "recursive_repr" function in
+# reprlib module to avoid dependency.
+def _recursive_repr(user_function):
+ # Decorator to make a repr function return "..." for a recursive
+ # call.
+ repr_running = set()
+
+ @functools.wraps(user_function)
+ def wrapper(self):
+ key = id(self), _thread.get_ident()
+ if key in repr_running:
+ return '...'
+ repr_running.add(key)
+ try:
+ result = user_function(self)
+ finally:
+ repr_running.discard(key)
+ return result
+ return wrapper
+
+
+def _create_fn(name, args, body, *, globals=None, locals=None,
+ return_type=MISSING):
+ # Note that we mutate locals when exec() is called. Caller
+ # beware! The only callers are internal to this module, so no
+ # worries about external callers.
+ if locals is None:
+ locals = {}
+ if 'BUILTINS' not in locals:
+ locals['BUILTINS'] = builtins
+ return_annotation = ''
+ if return_type is not MISSING:
+ locals['_return_type'] = return_type
+ return_annotation = '->_return_type'
+ args = ','.join(args)
+ body = '\n'.join(f' {b}' for b in body)
+
+ # Compute the text of the entire function.
+ txt = f' def {name}({args}){return_annotation}:\n{body}'
+
+ local_vars = ', '.join(locals.keys())
+ txt = f"def __create_fn__({local_vars}):\n{txt}\n return {name}"
+
+ ns = {}
+ exec(txt, globals, ns)
+ return ns['__create_fn__'](**locals)
+
+
+def _field_assign(frozen, name, value, self_name):
+ # If we're a frozen class, then assign to our fields in __init__
+ # via object.__setattr__. Otherwise, just use a simple
+ # assignment.
+ #
+ # self_name is what "self" is called in this function: don't
+ # hard-code "self", since that might be a field name.
+ if frozen:
+ return f'BUILTINS.object.__setattr__({self_name},{name!r},{value})'
+ return f'{self_name}.{name}={value}'
+
+
+def _field_init(f, frozen, globals, self_name):
+ # Return the text of the line in the body of __init__ that will
+ # initialize this field.
+
+ default_name = f'_dflt_{f.name}'
+ if f.default_factory is not MISSING:
+ if f.init:
+ # This field has a default factory. If a parameter is
+ # given, use it. If not, call the factory.
+ globals[default_name] = f.default_factory
+ value = (f'{default_name}() '
+ f'if {f.name} is _HAS_DEFAULT_FACTORY '
+ f'else {f.name}')
+ else:
+ # This is a field that's not in the __init__ params, but
+ # has a default factory function. It needs to be
+ # initialized here by calling the factory function,
+ # because there's no other way to initialize it.
+
+ # For a field initialized with a default=defaultvalue, the
+ # class dict just has the default value
+ # (cls.fieldname=defaultvalue). But that won't work for a
+ # default factory, the factory must be called in __init__
+ # and we must assign that to self.fieldname. We can't
+ # fall back to the class dict's value, both because it's
+ # not set, and because it might be different per-class
+ # (which, after all, is why we have a factory function!).
+
+ globals[default_name] = f.default_factory
+ value = f'{default_name}()'
+ else:
+ # No default factory.
+ if f.init:
+ if f.default is MISSING:
+ # There's no default, just do an assignment.
+ value = f.name
+ elif f.default is not MISSING:
+ globals[default_name] = f.default
+ value = f.name
+ else:
+ # This field does not need initialization. Signify that
+ # to the caller by returning None.
+ return None
+
+ # Only test this now, so that we can create variables for the
+ # default. However, return None to signify that we're not going
+ # to actually do the assignment statement for InitVars.
+ if f._field_type is _FIELD_INITVAR:
+ return None
+
+ # Now, actually generate the field assignment.
+ return _field_assign(frozen, f.name, value, self_name)
+
+
+def _init_param(f):
+ # Return the __init__ parameter string for this field. For
+ # example, the equivalent of 'x:int=3' (except instead of 'int',
+ # reference a variable set to int, and instead of '3', reference a
+ # variable set to 3).
+ if f.default is MISSING and f.default_factory is MISSING:
+ # There's no default, and no default_factory, just output the
+ # variable name and type.
+ default = ''
+ elif f.default is not MISSING:
+ # There's a default, this will be the name that's used to look
+ # it up.
+ default = f'=_dflt_{f.name}'
+ elif f.default_factory is not MISSING:
+ # There's a factory function. Set a marker.
+ default = '=_HAS_DEFAULT_FACTORY'
+ return f'{f.name}:_type_{f.name}{default}'
+
+
+def _init_fn(fields, frozen, has_post_init, self_name, globals):
+ # fields contains both real fields and InitVar pseudo-fields.
+
+ # Make sure we don't have fields without defaults following fields
+ # with defaults. This actually would be caught when exec-ing the
+ # function source code, but catching it here gives a better error
+ # message, and future-proofs us in case we build up the function
+ # using ast.
+ seen_default = False
+ for f in fields:
+ # Only consider fields in the __init__ call.
+ if f.init:
+ if not (f.default is MISSING and f.default_factory is MISSING):
+ seen_default = True
+ elif seen_default:
+ raise TypeError(f'non-default argument {f.name!r} '
+ 'follows default argument')
+
+ locals = {f'_type_{f.name}': f.type for f in fields}
+ locals.update({
+ 'MISSING': MISSING,
+ '_HAS_DEFAULT_FACTORY': _HAS_DEFAULT_FACTORY,
+ })
+
+ body_lines = []
+ for f in fields:
+ line = _field_init(f, frozen, locals, self_name)
+ # line is None means that this field doesn't require
+ # initialization (it's a pseudo-field). Just skip it.
+ if line:
+ body_lines.append(line)
+
+ # Does this class have a post-init function?
+ if has_post_init:
+ params_str = ','.join(f.name for f in fields
+ if f._field_type is _FIELD_INITVAR)
+ body_lines.append(f'{self_name}.{_POST_INIT_NAME}({params_str})')
+
+ # If no body lines, use 'pass'.
+ if not body_lines:
+ body_lines = ['pass']
+
+ return _create_fn('__init__',
+ [self_name] + [_init_param(f) for f in fields if f.init],
+ body_lines,
+ locals=locals,
+ globals=globals,
+ return_type=None)
+
+
+def _repr_fn(fields, globals):
+ fn = _create_fn('__repr__',
+ ('self',),
+ ['return self.__class__.__qualname__ + f"(' +
+ ', '.join([f"{f.name}={{self.{f.name}!r}}"
+ for f in fields]) +
+ ')"'],
+ globals=globals)
+ return _recursive_repr(fn)
+
+
+def _frozen_get_del_attr(cls, fields, globals):
+ locals = {'cls': cls,
+ 'FrozenInstanceError': FrozenInstanceError}
+ if fields:
+ fields_str = '(' + ','.join(repr(f.name) for f in fields) + ',)'
+ else:
+ # Special case for the zero-length tuple.
+ fields_str = '()'
+ return (_create_fn('__setattr__',
+ ('self', 'name', 'value'),
+ (f'if type(self) is cls or name in {fields_str}:',
+ ' raise FrozenInstanceError(f"cannot assign to field {name!r}")',
+ f'super(cls, self).__setattr__(name, value)'),
+ locals=locals,
+ globals=globals),
+ _create_fn('__delattr__',
+ ('self', 'name'),
+ (f'if type(self) is cls or name in {fields_str}:',
+ ' raise FrozenInstanceError(f"cannot delete field {name!r}")',
+ f'super(cls, self).__delattr__(name)'),
+ locals=locals,
+ globals=globals),
+ )
+
+
+def _cmp_fn(name, op, self_tuple, other_tuple, globals):
+ # Create a comparison function. If the fields in the object are
+ # named 'x' and 'y', then self_tuple is the string
+ # '(self.x,self.y)' and other_tuple is the string
+ # '(other.x,other.y)'.
+
+ return _create_fn(name,
+ ('self', 'other'),
+ [ 'if other.__class__ is self.__class__:',
+ f' return {self_tuple}{op}{other_tuple}',
+ 'return NotImplemented'],
+ globals=globals)
+
+
+def _hash_fn(fields, globals):
+ self_tuple = _tuple_str('self', fields)
+ return _create_fn('__hash__',
+ ('self',),
+ [f'return hash({self_tuple})'],
+ globals=globals)
+
+
+def _is_classvar(a_type, typing):
+ # This test uses a typing internal class, but it's the best way to
+ # test if this is a ClassVar.
+ return (a_type is typing.ClassVar
+ or (type(a_type) is typing._GenericAlias
+ and a_type.__origin__ is typing.ClassVar))
+
+
+def _is_initvar(a_type, dataclasses):
+ # The module we're checking against is the module we're
+ # currently in (dataclasses.py).
+ return (a_type is dataclasses.InitVar
+ or type(a_type) is dataclasses.InitVar)
+
+
+def _is_type(annotation, cls, a_module, a_type, is_type_predicate):
+ # Given a type annotation string, does it refer to a_type in
+ # a_module? For example, when checking that annotation denotes a
+ # ClassVar, then a_module is typing, and a_type is
+ # typing.ClassVar.
+
+ # It's possible to look up a_module given a_type, but it involves
+ # looking in sys.modules (again!), and seems like a waste since
+ # the caller already knows a_module.
+
+ # - annotation is a string type annotation
+ # - cls is the class that this annotation was found in
+ # - a_module is the module we want to match
+ # - a_type is the type in that module we want to match
+ # - is_type_predicate is a function called with (obj, a_module)
+ # that determines if obj is of the desired type.
+
+ # Since this test does not do a local namespace lookup (and
+ # instead only a module (global) lookup), there are some things it
+ # gets wrong.
+
+ # With string annotations, cv0 will be detected as a ClassVar:
+ # CV = ClassVar
+ # @dataclass
+ # class C0:
+ # cv0: CV
+
+ # But in this example cv1 will not be detected as a ClassVar:
+ # @dataclass
+ # class C1:
+ # CV = ClassVar
+ # cv1: CV
+
+ # In C1, the code in this function (_is_type) will look up "CV" in
+ # the module and not find it, so it will not consider cv1 as a
+ # ClassVar. This is a fairly obscure corner case, and the best
+ # way to fix it would be to eval() the string "CV" with the
+ # correct global and local namespaces. However that would involve
+ # a eval() penalty for every single field of every dataclass
+ # that's defined. It was judged not worth it.
+
+ match = _MODULE_IDENTIFIER_RE.match(annotation)
+ if match:
+ ns = None
+ module_name = match.group(1)
+ if not module_name:
+ # No module name, assume the class's module did
+ # "from dataclasses import InitVar".
+ ns = sys.modules.get(cls.__module__).__dict__
+ else:
+ # Look up module_name in the class's module.
+ module = sys.modules.get(cls.__module__)
+ if module and module.__dict__.get(module_name) is a_module:
+ ns = sys.modules.get(a_type.__module__).__dict__
+ if ns and is_type_predicate(ns.get(match.group(2)), a_module):
+ return True
+ return False
+
+
+def _get_field(cls, a_name, a_type):
+ # Return a Field object for this field name and type. ClassVars
+ # and InitVars are also returned, but marked as such (see
+ # f._field_type).
+
+ # If the default value isn't derived from Field, then it's only a
+ # normal default value. Convert it to a Field().
+ default = getattr(cls, a_name, MISSING)
+ if isinstance(default, Field):
+ f = default
+ else:
+ if isinstance(default, types.MemberDescriptorType):
+ # This is a field in __slots__, so it has no default value.
+ default = MISSING
+ f = field(default=default)
+
+ # Only at this point do we know the name and the type. Set them.
+ f.name = a_name
+ f.type = a_type
+
+ # Assume it's a normal field until proven otherwise. We're next
+ # going to decide if it's a ClassVar or InitVar, everything else
+ # is just a normal field.
+ f._field_type = _FIELD
+
+ # In addition to checking for actual types here, also check for
+ # string annotations. get_type_hints() won't always work for us
+ # (see https://github.com/python/typing/issues/508 for example),
+ # plus it's expensive and would require an eval for every stirng
+ # annotation. So, make a best effort to see if this is a ClassVar
+ # or InitVar using regex's and checking that the thing referenced
+ # is actually of the correct type.
+
+ # For the complete discussion, see https://bugs.python.org/issue33453
+
+ # If typing has not been imported, then it's impossible for any
+ # annotation to be a ClassVar. So, only look for ClassVar if
+ # typing has been imported by any module (not necessarily cls's
+ # module).
+ typing = sys.modules.get('typing')
+ if typing:
+ if (_is_classvar(a_type, typing)
+ or (isinstance(f.type, str)
+ and _is_type(f.type, cls, typing, typing.ClassVar,
+ _is_classvar))):
+ f._field_type = _FIELD_CLASSVAR
+
+ # If the type is InitVar, or if it's a matching string annotation,
+ # then it's an InitVar.
+ if f._field_type is _FIELD:
+ # The module we're checking against is the module we're
+ # currently in (dataclasses.py).
+ dataclasses = sys.modules[__name__]
+ if (_is_initvar(a_type, dataclasses)
+ or (isinstance(f.type, str)
+ and _is_type(f.type, cls, dataclasses, dataclasses.InitVar,
+ _is_initvar))):
+ f._field_type = _FIELD_INITVAR
+
+ # Validations for individual fields. This is delayed until now,
+ # instead of in the Field() constructor, since only here do we
+ # know the field name, which allows for better error reporting.
+
+ # Special restrictions for ClassVar and InitVar.
+ if f._field_type in (_FIELD_CLASSVAR, _FIELD_INITVAR):
+ if f.default_factory is not MISSING:
+ raise TypeError(f'field {f.name} cannot have a '
+ 'default factory')
+ # Should I check for other field settings? default_factory
+ # seems the most serious to check for. Maybe add others. For
+ # example, how about init=False (or really,
+ # init=)? It makes no sense for
+ # ClassVar and InitVar to specify init=.
+
+ # For real fields, disallow mutable defaults for known types.
+ if f._field_type is _FIELD and isinstance(f.default, (list, dict, set)):
+ raise ValueError(f'mutable default {type(f.default)} for field '
+ f'{f.name} is not allowed: use default_factory')
+
+ return f
+
+
+def _set_new_attribute(cls, name, value):
+ # Never overwrites an existing attribute. Returns True if the
+ # attribute already exists.
+ if name in cls.__dict__:
+ return True
+ setattr(cls, name, value)
+ return False
+
+
+# Decide if/how we're going to create a hash function. Key is
+# (unsafe_hash, eq, frozen, does-hash-exist). Value is the action to
+# take. The common case is to do nothing, so instead of providing a
+# function that is a no-op, use None to signify that.
+
+def _hash_set_none(cls, fields, globals):
+ return None
+
+def _hash_add(cls, fields, globals):
+ flds = [f for f in fields if (f.compare if f.hash is None else f.hash)]
+ return _hash_fn(flds, globals)
+
+def _hash_exception(cls, fields, globals):
+ # Raise an exception.
+ raise TypeError(f'Cannot overwrite attribute __hash__ '
+ f'in class {cls.__name__}')
+
+#
+# +-------------------------------------- unsafe_hash?
+# | +------------------------------- eq?
+# | | +------------------------ frozen?
+# | | | +---------------- has-explicit-hash?
+# | | | |
+# | | | | +------- action
+# | | | | |
+# v v v v v
+_hash_action = {(False, False, False, False): None,
+ (False, False, False, True ): None,
+ (False, False, True, False): None,
+ (False, False, True, True ): None,
+ (False, True, False, False): _hash_set_none,
+ (False, True, False, True ): None,
+ (False, True, True, False): _hash_add,
+ (False, True, True, True ): None,
+ (True, False, False, False): _hash_add,
+ (True, False, False, True ): _hash_exception,
+ (True, False, True, False): _hash_add,
+ (True, False, True, True ): _hash_exception,
+ (True, True, False, False): _hash_add,
+ (True, True, False, True ): _hash_exception,
+ (True, True, True, False): _hash_add,
+ (True, True, True, True ): _hash_exception,
+ }
+# See https://bugs.python.org/issue32929#msg312829 for an if-statement
+# version of this table.
+
+
+def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen):
+ # Now that dicts retain insertion order, there's no reason to use
+ # an ordered dict. I am leveraging that ordering here, because
+ # derived class fields overwrite base class fields, but the order
+ # is defined by the base class, which is found first.
+ fields = {}
+
+ if cls.__module__ in sys.modules:
+ globals = sys.modules[cls.__module__].__dict__
+ else:
+ # Theoretically this can happen if someone writes
+ # a custom string to cls.__module__. In which case
+ # such dataclass won't be fully introspectable
+ # (w.r.t. typing.get_type_hints) but will still function
+ # correctly.
+ globals = {}
+
+ setattr(cls, _PARAMS, _DataclassParams(init, repr, eq, order,
+ unsafe_hash, frozen))
+
+ # Find our base classes in reverse MRO order, and exclude
+ # ourselves. In reversed order so that more derived classes
+ # override earlier field definitions in base classes. As long as
+ # we're iterating over them, see if any are frozen.
+ any_frozen_base = False
+ has_dataclass_bases = False
+ for b in cls.__mro__[-1:0:-1]:
+ # Only process classes that have been processed by our
+ # decorator. That is, they have a _FIELDS attribute.
+ base_fields = getattr(b, _FIELDS, None)
+ if base_fields:
+ has_dataclass_bases = True
+ for f in base_fields.values():
+ fields[f.name] = f
+ if getattr(b, _PARAMS).frozen:
+ any_frozen_base = True
+
+ # Annotations that are defined in this class (not in base
+ # classes). If __annotations__ isn't present, then this class
+ # adds no new annotations. We use this to compute fields that are
+ # added by this class.
+ #
+ # Fields are found from cls_annotations, which is guaranteed to be
+ # ordered. Default values are from class attributes, if a field
+ # has a default. If the default value is a Field(), then it
+ # contains additional info beyond (and possibly including) the
+ # actual default value. Pseudo-fields ClassVars and InitVars are
+ # included, despite the fact that they're not real fields. That's
+ # dealt with later.
+ cls_annotations = cls.__dict__.get('__annotations__', {})
+
+ # Now find fields in our class. While doing so, validate some
+ # things, and set the default values (as class attributes) where
+ # we can.
+ cls_fields = [_get_field(cls, name, type)
+ for name, type in cls_annotations.items()]
+ for f in cls_fields:
+ fields[f.name] = f
+
+ # If the class attribute (which is the default value for this
+ # field) exists and is of type 'Field', replace it with the
+ # real default. This is so that normal class introspection
+ # sees a real default value, not a Field.
+ if isinstance(getattr(cls, f.name, None), Field):
+ if f.default is MISSING:
+ # If there's no default, delete the class attribute.
+ # This happens if we specify field(repr=False), for
+ # example (that is, we specified a field object, but
+ # no default value). Also if we're using a default
+ # factory. The class attribute should not be set at
+ # all in the post-processed class.
+ delattr(cls, f.name)
+ else:
+ setattr(cls, f.name, f.default)
+
+ # Do we have any Field members that don't also have annotations?
+ for name, value in cls.__dict__.items():
+ if isinstance(value, Field) and not name in cls_annotations:
+ raise TypeError(f'{name!r} is a field but has no type annotation')
+
+ # Check rules that apply if we are derived from any dataclasses.
+ if has_dataclass_bases:
+ # Raise an exception if any of our bases are frozen, but we're not.
+ if any_frozen_base and not frozen:
+ raise TypeError('cannot inherit non-frozen dataclass from a '
+ 'frozen one')
+
+ # Raise an exception if we're frozen, but none of our bases are.
+ if not any_frozen_base and frozen:
+ raise TypeError('cannot inherit frozen dataclass from a '
+ 'non-frozen one')
+
+ # Remember all of the fields on our class (including bases). This
+ # also marks this class as being a dataclass.
+ setattr(cls, _FIELDS, fields)
+
+ # Was this class defined with an explicit __hash__? Note that if
+ # __eq__ is defined in this class, then python will automatically
+ # set __hash__ to None. This is a heuristic, as it's possible
+ # that such a __hash__ == None was not auto-generated, but it
+ # close enough.
+ class_hash = cls.__dict__.get('__hash__', MISSING)
+ has_explicit_hash = not (class_hash is MISSING or
+ (class_hash is None and '__eq__' in cls.__dict__))
+
+ # If we're generating ordering methods, we must be generating the
+ # eq methods.
+ if order and not eq:
+ raise ValueError('eq must be true if order is true')
+
+ if init:
+ # Does this class have a post-init function?
+ has_post_init = hasattr(cls, _POST_INIT_NAME)
+
+ # Include InitVars and regular fields (so, not ClassVars).
+ flds = [f for f in fields.values()
+ if f._field_type in (_FIELD, _FIELD_INITVAR)]
+ _set_new_attribute(cls, '__init__',
+ _init_fn(flds,
+ frozen,
+ has_post_init,
+ # The name to use for the "self"
+ # param in __init__. Use "self"
+ # if possible.
+ '__dataclass_self__' if 'self' in fields
+ else 'self',
+ globals,
+ ))
+
+ # Get the fields as a list, and include only real fields. This is
+ # used in all of the following methods.
+ field_list = [f for f in fields.values() if f._field_type is _FIELD]
+
+ if repr:
+ flds = [f for f in field_list if f.repr]
+ _set_new_attribute(cls, '__repr__', _repr_fn(flds, globals))
+
+ if eq:
+ # Create _eq__ method. There's no need for a __ne__ method,
+ # since python will call __eq__ and negate it.
+ flds = [f for f in field_list if f.compare]
+ self_tuple = _tuple_str('self', flds)
+ other_tuple = _tuple_str('other', flds)
+ _set_new_attribute(cls, '__eq__',
+ _cmp_fn('__eq__', '==',
+ self_tuple, other_tuple,
+ globals=globals))
+
+ if order:
+ # Create and set the ordering methods.
+ flds = [f for f in field_list if f.compare]
+ self_tuple = _tuple_str('self', flds)
+ other_tuple = _tuple_str('other', flds)
+ for name, op in [('__lt__', '<'),
+ ('__le__', '<='),
+ ('__gt__', '>'),
+ ('__ge__', '>='),
+ ]:
+ if _set_new_attribute(cls, name,
+ _cmp_fn(name, op, self_tuple, other_tuple,
+ globals=globals)):
+ raise TypeError(f'Cannot overwrite attribute {name} '
+ f'in class {cls.__name__}. Consider using '
+ 'functools.total_ordering')
+
+ if frozen:
+ for fn in _frozen_get_del_attr(cls, field_list, globals):
+ if _set_new_attribute(cls, fn.__name__, fn):
+ raise TypeError(f'Cannot overwrite attribute {fn.__name__} '
+ f'in class {cls.__name__}')
+
+ # Decide if/how we're going to create a hash function.
+ hash_action = _hash_action[bool(unsafe_hash),
+ bool(eq),
+ bool(frozen),
+ has_explicit_hash]
+ if hash_action:
+ # No need to call _set_new_attribute here, since by the time
+ # we're here the overwriting is unconditional.
+ cls.__hash__ = hash_action(cls, field_list, globals)
+
+ if not getattr(cls, '__doc__'):
+ # Create a class doc-string.
+ cls.__doc__ = (cls.__name__ +
+ str(inspect.signature(cls)).replace(' -> None', ''))
+
+ return cls
+
+
+def dataclass(cls=None, /, *, init=True, repr=True, eq=True, order=False,
+ unsafe_hash=False, frozen=False):
+ """Returns the same class as was passed in, with dunder methods
+ added based on the fields defined in the class.
+
+ Examines PEP 526 __annotations__ to determine fields.
+
+ If init is true, an __init__() method is added to the class. If
+ repr is true, a __repr__() method is added. If order is true, rich
+ comparison dunder methods are added. If unsafe_hash is true, a
+ __hash__() method function is added. If frozen is true, fields may
+ not be assigned to after instance creation.
+ """
+
+ def wrap(cls):
+ return _process_class(cls, init, repr, eq, order, unsafe_hash, frozen)
+
+ # See if we're being called as @dataclass or @dataclass().
+ if cls is None:
+ # We're called with parens.
+ return wrap
+
+ # We're called as @dataclass without parens.
+ return wrap(cls)
+
+
+def fields(class_or_instance):
+ """Return a tuple describing the fields of this dataclass.
+
+ Accepts a dataclass or an instance of one. Tuple elements are of
+ type Field.
+ """
+
+ # Might it be worth caching this, per class?
+ try:
+ fields = getattr(class_or_instance, _FIELDS)
+ except AttributeError:
+ raise TypeError('must be called with a dataclass type or instance')
+
+ # Exclude pseudo-fields. Note that fields is sorted by insertion
+ # order, so the order of the tuple is as the fields were defined.
+ return tuple(f for f in fields.values() if f._field_type is _FIELD)
+
+
+def _is_dataclass_instance(obj):
+ """Returns True if obj is an instance of a dataclass."""
+ return hasattr(type(obj), _FIELDS)
+
+
+def is_dataclass(obj):
+ """Returns True if obj is a dataclass or an instance of a
+ dataclass."""
+ cls = obj if isinstance(obj, type) else type(obj)
+ return hasattr(cls, _FIELDS)
+
+
+def asdict(obj, *, dict_factory=dict):
+ """Return the fields of a dataclass instance as a new dictionary mapping
+ field names to field values.
+
+ Example usage:
+
+ @dataclass
+ class C:
+ x: int
+ y: int
+
+ c = C(1, 2)
+ assert asdict(c) == {'x': 1, 'y': 2}
+
+ If given, 'dict_factory' will be used instead of built-in dict.
+ The function applies recursively to field values that are
+ dataclass instances. This will also look into built-in containers:
+ tuples, lists, and dicts.
+ """
+ if not _is_dataclass_instance(obj):
+ raise TypeError("asdict() should be called on dataclass instances")
+ return _asdict_inner(obj, dict_factory)
+
+
+def _asdict_inner(obj, dict_factory):
+ if _is_dataclass_instance(obj):
+ result = []
+ for f in fields(obj):
+ value = _asdict_inner(getattr(obj, f.name), dict_factory)
+ result.append((f.name, value))
+ return dict_factory(result)
+ elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
+ # obj is a namedtuple. Recurse into it, but the returned
+ # object is another namedtuple of the same type. This is
+ # similar to how other list- or tuple-derived classes are
+ # treated (see below), but we just need to create them
+ # differently because a namedtuple's __init__ needs to be
+ # called differently (see bpo-34363).
+
+ # I'm not using namedtuple's _asdict()
+ # method, because:
+ # - it does not recurse in to the namedtuple fields and
+ # convert them to dicts (using dict_factory).
+ # - I don't actually want to return a dict here. The main
+ # use case here is json.dumps, and it handles converting
+ # namedtuples to lists. Admittedly we're losing some
+ # information here when we produce a json list instead of a
+ # dict. Note that if we returned dicts here instead of
+ # namedtuples, we could no longer call asdict() on a data
+ # structure where a namedtuple was used as a dict key.
+
+ return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj])
+ elif isinstance(obj, (list, tuple)):
+ # Assume we can create an object of this type by passing in a
+ # generator (which is not true for namedtuples, handled
+ # above).
+ return type(obj)(_asdict_inner(v, dict_factory) for v in obj)
+ elif isinstance(obj, dict):
+ return type(obj)((_asdict_inner(k, dict_factory),
+ _asdict_inner(v, dict_factory))
+ for k, v in obj.items())
+ else:
+ return copy.deepcopy(obj)
+
+
+def astuple(obj, *, tuple_factory=tuple):
+ """Return the fields of a dataclass instance as a new tuple of field values.
+
+ Example usage::
+
+ @dataclass
+ class C:
+ x: int
+ y: int
+
+ c = C(1, 2)
+ assert astuple(c) == (1, 2)
+
+ If given, 'tuple_factory' will be used instead of built-in tuple.
+ The function applies recursively to field values that are
+ dataclass instances. This will also look into built-in containers:
+ tuples, lists, and dicts.
+ """
+
+ if not _is_dataclass_instance(obj):
+ raise TypeError("astuple() should be called on dataclass instances")
+ return _astuple_inner(obj, tuple_factory)
+
+
+def _astuple_inner(obj, tuple_factory):
+ if _is_dataclass_instance(obj):
+ result = []
+ for f in fields(obj):
+ value = _astuple_inner(getattr(obj, f.name), tuple_factory)
+ result.append(value)
+ return tuple_factory(result)
+ elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
+ # obj is a namedtuple. Recurse into it, but the returned
+ # object is another namedtuple of the same type. This is
+ # similar to how other list- or tuple-derived classes are
+ # treated (see below), but we just need to create them
+ # differently because a namedtuple's __init__ needs to be
+ # called differently (see bpo-34363).
+ return type(obj)(*[_astuple_inner(v, tuple_factory) for v in obj])
+ elif isinstance(obj, (list, tuple)):
+ # Assume we can create an object of this type by passing in a
+ # generator (which is not true for namedtuples, handled
+ # above).
+ return type(obj)(_astuple_inner(v, tuple_factory) for v in obj)
+ elif isinstance(obj, dict):
+ return type(obj)((_astuple_inner(k, tuple_factory), _astuple_inner(v, tuple_factory))
+ for k, v in obj.items())
+ else:
+ return copy.deepcopy(obj)
+
+
+def make_dataclass(cls_name, fields, *, bases=(), namespace=None, init=True,
+ repr=True, eq=True, order=False, unsafe_hash=False,
+ frozen=False):
+ """Return a new dynamically created dataclass.
+
+ The dataclass name will be 'cls_name'. 'fields' is an iterable
+ of either (name), (name, type) or (name, type, Field) objects. If type is
+ omitted, use the string 'typing.Any'. Field objects are created by
+ the equivalent of calling 'field(name, type [, Field-info])'.
+
+ C = make_dataclass('C', ['x', ('y', int), ('z', int, field(init=False))], bases=(Base,))
+
+ is equivalent to:
+
+ @dataclass
+ class C(Base):
+ x: 'typing.Any'
+ y: int
+ z: int = field(init=False)
+
+ For the bases and namespace parameters, see the builtin type() function.
+
+ The parameters init, repr, eq, order, unsafe_hash, and frozen are passed to
+ dataclass().
+ """
+
+ if namespace is None:
+ namespace = {}
+ else:
+ # Copy namespace since we're going to mutate it.
+ namespace = namespace.copy()
+
+ # While we're looking through the field names, validate that they
+ # are identifiers, are not keywords, and not duplicates.
+ seen = set()
+ anns = {}
+ for item in fields:
+ if isinstance(item, str):
+ name = item
+ tp = 'typing.Any'
+ elif len(item) == 2:
+ name, tp, = item
+ elif len(item) == 3:
+ name, tp, spec = item
+ namespace[name] = spec
+ else:
+ raise TypeError(f'Invalid field: {item!r}')
+
+ if not isinstance(name, str) or not name.isidentifier():
+ raise TypeError(f'Field names must be valid identifiers: {name!r}')
+ if keyword.iskeyword(name):
+ raise TypeError(f'Field names must not be keywords: {name!r}')
+ if name in seen:
+ raise TypeError(f'Field name duplicated: {name!r}')
+
+ seen.add(name)
+ anns[name] = tp
+
+ namespace['__annotations__'] = anns
+ # We use `types.new_class()` instead of simply `type()` to allow dynamic creation
+ # of generic dataclassses.
+ cls = types.new_class(cls_name, bases, {}, lambda ns: ns.update(namespace))
+ return dataclass(cls, init=init, repr=repr, eq=eq, order=order,
+ unsafe_hash=unsafe_hash, frozen=frozen)
+
+
+def replace(*args, **changes):
+ """Return a new object replacing specified fields with new values.
+
+ This is especially useful for frozen classes. Example usage:
+
+ @dataclass(frozen=True)
+ class C:
+ x: int
+ y: int
+
+ c = C(1, 2)
+ c1 = replace(c, x=3)
+ assert c1.x == 3 and c1.y == 2
+ """
+ if len(args) > 1:
+ raise TypeError(f'replace() takes 1 positional argument but {len(args)} were given')
+ if args:
+ obj, = args
+ elif 'obj' in changes:
+ obj = changes.pop('obj')
+ import warnings
+ warnings.warn("Passing 'obj' as keyword argument is deprecated",
+ DeprecationWarning, stacklevel=2)
+ else:
+ raise TypeError("replace() missing 1 required positional argument: 'obj'")
+
+ # We're going to mutate 'changes', but that's okay because it's a
+ # new dict, even if called with 'replace(obj, **my_changes)'.
+
+ if not _is_dataclass_instance(obj):
+ raise TypeError("replace() should be called on dataclass instances")
+
+ # It's an error to have init=False fields in 'changes'.
+ # If a field is not in 'changes', read its value from the provided obj.
+
+ for f in getattr(obj, _FIELDS).values():
+ # Only consider normal fields or InitVars.
+ if f._field_type is _FIELD_CLASSVAR:
+ continue
+
+ if not f.init:
+ # Error if this field is specified in changes.
+ if f.name in changes:
+ raise ValueError(f'field {f.name} is declared with '
+ 'init=False, it cannot be specified with '
+ 'replace()')
+ continue
+
+ if f.name not in changes:
+ if f._field_type is _FIELD_INITVAR:
+ raise ValueError(f"InitVar {f.name!r} "
+ 'must be specified with replace()')
+ changes[f.name] = getattr(obj, f.name)
+
+ # Create the new object, which calls __init__() and
+ # __post_init__() (if defined), using all of the init fields we've
+ # added and/or left in 'changes'. If there are values supplied in
+ # changes that aren't fields, this will correctly raise a
+ # TypeError.
+ return obj.__class__(**changes)
+replace.__text_signature__ = '(obj, /, **kwargs)'
diff --git a/dist/lib/datetime.py b/dist/lib/datetime.py
new file mode 100644
index 0000000..0adf1dd
--- /dev/null
+++ b/dist/lib/datetime.py
@@ -0,0 +1,2518 @@
+"""Concrete date/time and related types.
+
+See http://www.iana.org/time-zones/repository/tz-link.html for
+time zone and DST data sources.
+"""
+
+import time as _time
+import math as _math
+import sys
+
+def _cmp(x, y):
+ return 0 if x == y else 1 if x > y else -1
+
+MINYEAR = 1
+MAXYEAR = 9999
+_MAXORDINAL = 3652059 # date.max.toordinal()
+
+# Utility functions, adapted from Python's Demo/classes/Dates.py, which
+# also assumes the current Gregorian calendar indefinitely extended in
+# both directions. Difference: Dates.py calls January 1 of year 0 day
+# number 1. The code here calls January 1 of year 1 day number 1. This is
+# to match the definition of the "proleptic Gregorian" calendar in Dershowitz
+# and Reingold's "Calendrical Calculations", where it's the base calendar
+# for all computations. See the book for algorithms for converting between
+# proleptic Gregorian ordinals and many other calendar systems.
+
+# -1 is a placeholder for indexing purposes.
+_DAYS_IN_MONTH = [-1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
+
+_DAYS_BEFORE_MONTH = [-1] # -1 is a placeholder for indexing purposes.
+dbm = 0
+for dim in _DAYS_IN_MONTH[1:]:
+ _DAYS_BEFORE_MONTH.append(dbm)
+ dbm += dim
+del dbm, dim
+
+def _is_leap(year):
+ "year -> 1 if leap year, else 0."
+ return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)
+
+def _days_before_year(year):
+ "year -> number of days before January 1st of year."
+ y = year - 1
+ return y*365 + y//4 - y//100 + y//400
+
+def _days_in_month(year, month):
+ "year, month -> number of days in that month in that year."
+ assert 1 <= month <= 12, month
+ if month == 2 and _is_leap(year):
+ return 29
+ return _DAYS_IN_MONTH[month]
+
+def _days_before_month(year, month):
+ "year, month -> number of days in year preceding first day of month."
+ assert 1 <= month <= 12, 'month must be in 1..12'
+ return _DAYS_BEFORE_MONTH[month] + (month > 2 and _is_leap(year))
+
+def _ymd2ord(year, month, day):
+ "year, month, day -> ordinal, considering 01-Jan-0001 as day 1."
+ assert 1 <= month <= 12, 'month must be in 1..12'
+ dim = _days_in_month(year, month)
+ assert 1 <= day <= dim, ('day must be in 1..%d' % dim)
+ return (_days_before_year(year) +
+ _days_before_month(year, month) +
+ day)
+
+_DI400Y = _days_before_year(401) # number of days in 400 years
+_DI100Y = _days_before_year(101) # " " " " 100 "
+_DI4Y = _days_before_year(5) # " " " " 4 "
+
+# A 4-year cycle has an extra leap day over what we'd get from pasting
+# together 4 single years.
+assert _DI4Y == 4 * 365 + 1
+
+# Similarly, a 400-year cycle has an extra leap day over what we'd get from
+# pasting together 4 100-year cycles.
+assert _DI400Y == 4 * _DI100Y + 1
+
+# OTOH, a 100-year cycle has one fewer leap day than we'd get from
+# pasting together 25 4-year cycles.
+assert _DI100Y == 25 * _DI4Y - 1
+
+def _ord2ymd(n):
+ "ordinal -> (year, month, day), considering 01-Jan-0001 as day 1."
+
+ # n is a 1-based index, starting at 1-Jan-1. The pattern of leap years
+ # repeats exactly every 400 years. The basic strategy is to find the
+ # closest 400-year boundary at or before n, then work with the offset
+ # from that boundary to n. Life is much clearer if we subtract 1 from
+ # n first -- then the values of n at 400-year boundaries are exactly
+ # those divisible by _DI400Y:
+ #
+ # D M Y n n-1
+ # -- --- ---- ---------- ----------------
+ # 31 Dec -400 -_DI400Y -_DI400Y -1
+ # 1 Jan -399 -_DI400Y +1 -_DI400Y 400-year boundary
+ # ...
+ # 30 Dec 000 -1 -2
+ # 31 Dec 000 0 -1
+ # 1 Jan 001 1 0 400-year boundary
+ # 2 Jan 001 2 1
+ # 3 Jan 001 3 2
+ # ...
+ # 31 Dec 400 _DI400Y _DI400Y -1
+ # 1 Jan 401 _DI400Y +1 _DI400Y 400-year boundary
+ n -= 1
+ n400, n = divmod(n, _DI400Y)
+ year = n400 * 400 + 1 # ..., -399, 1, 401, ...
+
+ # Now n is the (non-negative) offset, in days, from January 1 of year, to
+ # the desired date. Now compute how many 100-year cycles precede n.
+ # Note that it's possible for n100 to equal 4! In that case 4 full
+ # 100-year cycles precede the desired day, which implies the desired
+ # day is December 31 at the end of a 400-year cycle.
+ n100, n = divmod(n, _DI100Y)
+
+ # Now compute how many 4-year cycles precede it.
+ n4, n = divmod(n, _DI4Y)
+
+ # And now how many single years. Again n1 can be 4, and again meaning
+ # that the desired day is December 31 at the end of the 4-year cycle.
+ n1, n = divmod(n, 365)
+
+ year += n100 * 100 + n4 * 4 + n1
+ if n1 == 4 or n100 == 4:
+ assert n == 0
+ return year-1, 12, 31
+
+ # Now the year is correct, and n is the offset from January 1. We find
+ # the month via an estimate that's either exact or one too large.
+ leapyear = n1 == 3 and (n4 != 24 or n100 == 3)
+ assert leapyear == _is_leap(year)
+ month = (n + 50) >> 5
+ preceding = _DAYS_BEFORE_MONTH[month] + (month > 2 and leapyear)
+ if preceding > n: # estimate is too large
+ month -= 1
+ preceding -= _DAYS_IN_MONTH[month] + (month == 2 and leapyear)
+ n -= preceding
+ assert 0 <= n < _days_in_month(year, month)
+
+ # Now the year and month are correct, and n is the offset from the
+ # start of that month: we're done!
+ return year, month, n+1
+
+# Month and day names. For localized versions, see the calendar module.
+_MONTHNAMES = [None, "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
+_DAYNAMES = [None, "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
+
+
+def _build_struct_time(y, m, d, hh, mm, ss, dstflag):
+ wday = (_ymd2ord(y, m, d) + 6) % 7
+ dnum = _days_before_month(y, m) + d
+ return _time.struct_time((y, m, d, hh, mm, ss, wday, dnum, dstflag))
+
+def _format_time(hh, mm, ss, us, timespec='auto'):
+ specs = {
+ 'hours': '{:02d}',
+ 'minutes': '{:02d}:{:02d}',
+ 'seconds': '{:02d}:{:02d}:{:02d}',
+ 'milliseconds': '{:02d}:{:02d}:{:02d}.{:03d}',
+ 'microseconds': '{:02d}:{:02d}:{:02d}.{:06d}'
+ }
+
+ if timespec == 'auto':
+ # Skip trailing microseconds when us==0.
+ timespec = 'microseconds' if us else 'seconds'
+ elif timespec == 'milliseconds':
+ us //= 1000
+ try:
+ fmt = specs[timespec]
+ except KeyError:
+ raise ValueError('Unknown timespec value')
+ else:
+ return fmt.format(hh, mm, ss, us)
+
+def _format_offset(off):
+ s = ''
+ if off is not None:
+ if off.days < 0:
+ sign = "-"
+ off = -off
+ else:
+ sign = "+"
+ hh, mm = divmod(off, timedelta(hours=1))
+ mm, ss = divmod(mm, timedelta(minutes=1))
+ s += "%s%02d:%02d" % (sign, hh, mm)
+ if ss or ss.microseconds:
+ s += ":%02d" % ss.seconds
+
+ if ss.microseconds:
+ s += '.%06d' % ss.microseconds
+ return s
+
+# Correctly substitute for %z and %Z escapes in strftime formats.
+def _wrap_strftime(object, format, timetuple):
+ # Don't call utcoffset() or tzname() unless actually needed.
+ freplace = None # the string to use for %f
+ zreplace = None # the string to use for %z
+ Zreplace = None # the string to use for %Z
+
+ # Scan format for %z and %Z escapes, replacing as needed.
+ newformat = []
+ push = newformat.append
+ i, n = 0, len(format)
+ while i < n:
+ ch = format[i]
+ i += 1
+ if ch == '%':
+ if i < n:
+ ch = format[i]
+ i += 1
+ if ch == 'f':
+ if freplace is None:
+ freplace = '%06d' % getattr(object,
+ 'microsecond', 0)
+ newformat.append(freplace)
+ elif ch == 'z':
+ if zreplace is None:
+ zreplace = ""
+ if hasattr(object, "utcoffset"):
+ offset = object.utcoffset()
+ if offset is not None:
+ sign = '+'
+ if offset.days < 0:
+ offset = -offset
+ sign = '-'
+ h, rest = divmod(offset, timedelta(hours=1))
+ m, rest = divmod(rest, timedelta(minutes=1))
+ s = rest.seconds
+ u = offset.microseconds
+ if u:
+ zreplace = '%c%02d%02d%02d.%06d' % (sign, h, m, s, u)
+ elif s:
+ zreplace = '%c%02d%02d%02d' % (sign, h, m, s)
+ else:
+ zreplace = '%c%02d%02d' % (sign, h, m)
+ assert '%' not in zreplace
+ newformat.append(zreplace)
+ elif ch == 'Z':
+ if Zreplace is None:
+ Zreplace = ""
+ if hasattr(object, "tzname"):
+ s = object.tzname()
+ if s is not None:
+ # strftime is going to have at this: escape %
+ Zreplace = s.replace('%', '%%')
+ newformat.append(Zreplace)
+ else:
+ push('%')
+ push(ch)
+ else:
+ push('%')
+ else:
+ push(ch)
+ newformat = "".join(newformat)
+ return _time.strftime(newformat, timetuple)
+
+# Helpers for parsing the result of isoformat()
+def _parse_isoformat_date(dtstr):
+ # It is assumed that this function will only be called with a
+ # string of length exactly 10, and (though this is not used) ASCII-only
+ year = int(dtstr[0:4])
+ if dtstr[4] != '-':
+ raise ValueError('Invalid date separator: %s' % dtstr[4])
+
+ month = int(dtstr[5:7])
+
+ if dtstr[7] != '-':
+ raise ValueError('Invalid date separator')
+
+ day = int(dtstr[8:10])
+
+ return [year, month, day]
+
+def _parse_hh_mm_ss_ff(tstr):
+ # Parses things of the form HH[:MM[:SS[.fff[fff]]]]
+ len_str = len(tstr)
+
+ time_comps = [0, 0, 0, 0]
+ pos = 0
+ for comp in range(0, 3):
+ if (len_str - pos) < 2:
+ raise ValueError('Incomplete time component')
+
+ time_comps[comp] = int(tstr[pos:pos+2])
+
+ pos += 2
+ next_char = tstr[pos:pos+1]
+
+ if not next_char or comp >= 2:
+ break
+
+ if next_char != ':':
+ raise ValueError('Invalid time separator: %c' % next_char)
+
+ pos += 1
+
+ if pos < len_str:
+ if tstr[pos] != '.':
+ raise ValueError('Invalid microsecond component')
+ else:
+ pos += 1
+
+ len_remainder = len_str - pos
+ if len_remainder not in (3, 6):
+ raise ValueError('Invalid microsecond component')
+
+ time_comps[3] = int(tstr[pos:])
+ if len_remainder == 3:
+ time_comps[3] *= 1000
+
+ return time_comps
+
+def _parse_isoformat_time(tstr):
+ # Format supported is HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]]
+ len_str = len(tstr)
+ if len_str < 2:
+ raise ValueError('Isoformat time too short')
+
+ # This is equivalent to re.search('[+-]', tstr), but faster
+ tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1)
+ timestr = tstr[:tz_pos-1] if tz_pos > 0 else tstr
+
+ time_comps = _parse_hh_mm_ss_ff(timestr)
+
+ tzi = None
+ if tz_pos > 0:
+ tzstr = tstr[tz_pos:]
+
+ # Valid time zone strings are:
+ # HH:MM len: 5
+ # HH:MM:SS len: 8
+ # HH:MM:SS.ffffff len: 15
+
+ if len(tzstr) not in (5, 8, 15):
+ raise ValueError('Malformed time zone string')
+
+ tz_comps = _parse_hh_mm_ss_ff(tzstr)
+ if all(x == 0 for x in tz_comps):
+ tzi = timezone.utc
+ else:
+ tzsign = -1 if tstr[tz_pos - 1] == '-' else 1
+
+ td = timedelta(hours=tz_comps[0], minutes=tz_comps[1],
+ seconds=tz_comps[2], microseconds=tz_comps[3])
+
+ tzi = timezone(tzsign * td)
+
+ time_comps.append(tzi)
+
+ return time_comps
+
+
+# Just raise TypeError if the arg isn't None or a string.
+def _check_tzname(name):
+ if name is not None and not isinstance(name, str):
+ raise TypeError("tzinfo.tzname() must return None or string, "
+ "not '%s'" % type(name))
+
+# name is the offset-producing method, "utcoffset" or "dst".
+# offset is what it returned.
+# If offset isn't None or timedelta, raises TypeError.
+# If offset is None, returns None.
+# Else offset is checked for being in range.
+# If it is, its integer value is returned. Else ValueError is raised.
+def _check_utc_offset(name, offset):
+ assert name in ("utcoffset", "dst")
+ if offset is None:
+ return
+ if not isinstance(offset, timedelta):
+ raise TypeError("tzinfo.%s() must return None "
+ "or timedelta, not '%s'" % (name, type(offset)))
+ if not -timedelta(1) < offset < timedelta(1):
+ raise ValueError("%s()=%s, must be strictly between "
+ "-timedelta(hours=24) and timedelta(hours=24)" %
+ (name, offset))
+
+def _check_int_field(value):
+ if isinstance(value, int):
+ return value
+ if isinstance(value, float):
+ raise TypeError('integer argument expected, got float')
+ try:
+ value = value.__index__()
+ except AttributeError:
+ pass
+ else:
+ if not isinstance(value, int):
+ raise TypeError('__index__ returned non-int (type %s)' %
+ type(value).__name__)
+ return value
+ orig = value
+ try:
+ value = value.__int__()
+ except AttributeError:
+ pass
+ else:
+ if not isinstance(value, int):
+ raise TypeError('__int__ returned non-int (type %s)' %
+ type(value).__name__)
+ import warnings
+ warnings.warn("an integer is required (got type %s)" %
+ type(orig).__name__,
+ DeprecationWarning,
+ stacklevel=2)
+ return value
+ raise TypeError('an integer is required (got type %s)' %
+ type(value).__name__)
+
+def _check_date_fields(year, month, day):
+ year = _check_int_field(year)
+ month = _check_int_field(month)
+ day = _check_int_field(day)
+ if not MINYEAR <= year <= MAXYEAR:
+ raise ValueError('year must be in %d..%d' % (MINYEAR, MAXYEAR), year)
+ if not 1 <= month <= 12:
+ raise ValueError('month must be in 1..12', month)
+ dim = _days_in_month(year, month)
+ if not 1 <= day <= dim:
+ raise ValueError('day must be in 1..%d' % dim, day)
+ return year, month, day
+
+def _check_time_fields(hour, minute, second, microsecond, fold):
+ hour = _check_int_field(hour)
+ minute = _check_int_field(minute)
+ second = _check_int_field(second)
+ microsecond = _check_int_field(microsecond)
+ if not 0 <= hour <= 23:
+ raise ValueError('hour must be in 0..23', hour)
+ if not 0 <= minute <= 59:
+ raise ValueError('minute must be in 0..59', minute)
+ if not 0 <= second <= 59:
+ raise ValueError('second must be in 0..59', second)
+ if not 0 <= microsecond <= 999999:
+ raise ValueError('microsecond must be in 0..999999', microsecond)
+ if fold not in (0, 1):
+ raise ValueError('fold must be either 0 or 1', fold)
+ return hour, minute, second, microsecond, fold
+
+def _check_tzinfo_arg(tz):
+ if tz is not None and not isinstance(tz, tzinfo):
+ raise TypeError("tzinfo argument must be None or of a tzinfo subclass")
+
+def _cmperror(x, y):
+ raise TypeError("can't compare '%s' to '%s'" % (
+ type(x).__name__, type(y).__name__))
+
+def _divide_and_round(a, b):
+ """divide a by b and round result to the nearest integer
+
+ When the ratio is exactly half-way between two integers,
+ the even integer is returned.
+ """
+ # Based on the reference implementation for divmod_near
+ # in Objects/longobject.c.
+ q, r = divmod(a, b)
+ # round up if either r / b > 0.5, or r / b == 0.5 and q is odd.
+ # The expression r / b > 0.5 is equivalent to 2 * r > b if b is
+ # positive, 2 * r < b if b negative.
+ r *= 2
+ greater_than_half = r > b if b > 0 else r < b
+ if greater_than_half or r == b and q % 2 == 1:
+ q += 1
+
+ return q
+
+
+class timedelta:
+ """Represent the difference between two datetime objects.
+
+ Supported operators:
+
+ - add, subtract timedelta
+ - unary plus, minus, abs
+ - compare to timedelta
+ - multiply, divide by int
+
+ In addition, datetime supports subtraction of two datetime objects
+ returning a timedelta, and addition or subtraction of a datetime
+ and a timedelta giving a datetime.
+
+ Representation: (days, seconds, microseconds). Why? Because I
+ felt like it.
+ """
+ __slots__ = '_days', '_seconds', '_microseconds', '_hashcode'
+
+ def __new__(cls, days=0, seconds=0, microseconds=0,
+ milliseconds=0, minutes=0, hours=0, weeks=0):
+ # Doing this efficiently and accurately in C is going to be difficult
+ # and error-prone, due to ubiquitous overflow possibilities, and that
+ # C double doesn't have enough bits of precision to represent
+ # microseconds over 10K years faithfully. The code here tries to make
+ # explicit where go-fast assumptions can be relied on, in order to
+ # guide the C implementation; it's way more convoluted than speed-
+ # ignoring auto-overflow-to-long idiomatic Python could be.
+
+ # XXX Check that all inputs are ints or floats.
+
+ # Final values, all integer.
+ # s and us fit in 32-bit signed ints; d isn't bounded.
+ d = s = us = 0
+
+ # Normalize everything to days, seconds, microseconds.
+ days += weeks*7
+ seconds += minutes*60 + hours*3600
+ microseconds += milliseconds*1000
+
+ # Get rid of all fractions, and normalize s and us.
+ # Take a deep breath .
+ if isinstance(days, float):
+ dayfrac, days = _math.modf(days)
+ daysecondsfrac, daysecondswhole = _math.modf(dayfrac * (24.*3600.))
+ assert daysecondswhole == int(daysecondswhole) # can't overflow
+ s = int(daysecondswhole)
+ assert days == int(days)
+ d = int(days)
+ else:
+ daysecondsfrac = 0.0
+ d = days
+ assert isinstance(daysecondsfrac, float)
+ assert abs(daysecondsfrac) <= 1.0
+ assert isinstance(d, int)
+ assert abs(s) <= 24 * 3600
+ # days isn't referenced again before redefinition
+
+ if isinstance(seconds, float):
+ secondsfrac, seconds = _math.modf(seconds)
+ assert seconds == int(seconds)
+ seconds = int(seconds)
+ secondsfrac += daysecondsfrac
+ assert abs(secondsfrac) <= 2.0
+ else:
+ secondsfrac = daysecondsfrac
+ # daysecondsfrac isn't referenced again
+ assert isinstance(secondsfrac, float)
+ assert abs(secondsfrac) <= 2.0
+
+ assert isinstance(seconds, int)
+ days, seconds = divmod(seconds, 24*3600)
+ d += days
+ s += int(seconds) # can't overflow
+ assert isinstance(s, int)
+ assert abs(s) <= 2 * 24 * 3600
+ # seconds isn't referenced again before redefinition
+
+ usdouble = secondsfrac * 1e6
+ assert abs(usdouble) < 2.1e6 # exact value not critical
+ # secondsfrac isn't referenced again
+
+ if isinstance(microseconds, float):
+ microseconds = round(microseconds + usdouble)
+ seconds, microseconds = divmod(microseconds, 1000000)
+ days, seconds = divmod(seconds, 24*3600)
+ d += days
+ s += seconds
+ else:
+ microseconds = int(microseconds)
+ seconds, microseconds = divmod(microseconds, 1000000)
+ days, seconds = divmod(seconds, 24*3600)
+ d += days
+ s += seconds
+ microseconds = round(microseconds + usdouble)
+ assert isinstance(s, int)
+ assert isinstance(microseconds, int)
+ assert abs(s) <= 3 * 24 * 3600
+ assert abs(microseconds) < 3.1e6
+
+ # Just a little bit of carrying possible for microseconds and seconds.
+ seconds, us = divmod(microseconds, 1000000)
+ s += seconds
+ days, s = divmod(s, 24*3600)
+ d += days
+
+ assert isinstance(d, int)
+ assert isinstance(s, int) and 0 <= s < 24*3600
+ assert isinstance(us, int) and 0 <= us < 1000000
+
+ if abs(d) > 999999999:
+ raise OverflowError("timedelta # of days is too large: %d" % d)
+
+ self = object.__new__(cls)
+ self._days = d
+ self._seconds = s
+ self._microseconds = us
+ self._hashcode = -1
+ return self
+
+ def __repr__(self):
+ args = []
+ if self._days:
+ args.append("days=%d" % self._days)
+ if self._seconds:
+ args.append("seconds=%d" % self._seconds)
+ if self._microseconds:
+ args.append("microseconds=%d" % self._microseconds)
+ if not args:
+ args.append('0')
+ return "%s.%s(%s)" % (self.__class__.__module__,
+ self.__class__.__qualname__,
+ ', '.join(args))
+
+ def __str__(self):
+ mm, ss = divmod(self._seconds, 60)
+ hh, mm = divmod(mm, 60)
+ s = "%d:%02d:%02d" % (hh, mm, ss)
+ if self._days:
+ def plural(n):
+ return n, abs(n) != 1 and "s" or ""
+ s = ("%d day%s, " % plural(self._days)) + s
+ if self._microseconds:
+ s = s + ".%06d" % self._microseconds
+ return s
+
+ def total_seconds(self):
+ """Total seconds in the duration."""
+ return ((self.days * 86400 + self.seconds) * 10**6 +
+ self.microseconds) / 10**6
+
+ # Read-only field accessors
+ @property
+ def days(self):
+ """days"""
+ return self._days
+
+ @property
+ def seconds(self):
+ """seconds"""
+ return self._seconds
+
+ @property
+ def microseconds(self):
+ """microseconds"""
+ return self._microseconds
+
+ def __add__(self, other):
+ if isinstance(other, timedelta):
+ # for CPython compatibility, we cannot use
+ # our __class__ here, but need a real timedelta
+ return timedelta(self._days + other._days,
+ self._seconds + other._seconds,
+ self._microseconds + other._microseconds)
+ return NotImplemented
+
+ __radd__ = __add__
+
+ def __sub__(self, other):
+ if isinstance(other, timedelta):
+ # for CPython compatibility, we cannot use
+ # our __class__ here, but need a real timedelta
+ return timedelta(self._days - other._days,
+ self._seconds - other._seconds,
+ self._microseconds - other._microseconds)
+ return NotImplemented
+
+ def __rsub__(self, other):
+ if isinstance(other, timedelta):
+ return -self + other
+ return NotImplemented
+
+ def __neg__(self):
+ # for CPython compatibility, we cannot use
+ # our __class__ here, but need a real timedelta
+ return timedelta(-self._days,
+ -self._seconds,
+ -self._microseconds)
+
+ def __pos__(self):
+ return self
+
+ def __abs__(self):
+ if self._days < 0:
+ return -self
+ else:
+ return self
+
+ def __mul__(self, other):
+ if isinstance(other, int):
+ # for CPython compatibility, we cannot use
+ # our __class__ here, but need a real timedelta
+ return timedelta(self._days * other,
+ self._seconds * other,
+ self._microseconds * other)
+ if isinstance(other, float):
+ usec = self._to_microseconds()
+ a, b = other.as_integer_ratio()
+ return timedelta(0, 0, _divide_and_round(usec * a, b))
+ return NotImplemented
+
+ __rmul__ = __mul__
+
+ def _to_microseconds(self):
+ return ((self._days * (24*3600) + self._seconds) * 1000000 +
+ self._microseconds)
+
+ def __floordiv__(self, other):
+ if not isinstance(other, (int, timedelta)):
+ return NotImplemented
+ usec = self._to_microseconds()
+ if isinstance(other, timedelta):
+ return usec // other._to_microseconds()
+ if isinstance(other, int):
+ return timedelta(0, 0, usec // other)
+
+ def __truediv__(self, other):
+ if not isinstance(other, (int, float, timedelta)):
+ return NotImplemented
+ usec = self._to_microseconds()
+ if isinstance(other, timedelta):
+ return usec / other._to_microseconds()
+ if isinstance(other, int):
+ return timedelta(0, 0, _divide_and_round(usec, other))
+ if isinstance(other, float):
+ a, b = other.as_integer_ratio()
+ return timedelta(0, 0, _divide_and_round(b * usec, a))
+
+ def __mod__(self, other):
+ if isinstance(other, timedelta):
+ r = self._to_microseconds() % other._to_microseconds()
+ return timedelta(0, 0, r)
+ return NotImplemented
+
+ def __divmod__(self, other):
+ if isinstance(other, timedelta):
+ q, r = divmod(self._to_microseconds(),
+ other._to_microseconds())
+ return q, timedelta(0, 0, r)
+ return NotImplemented
+
+ # Comparisons of timedelta objects with other.
+
+ def __eq__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) == 0
+ else:
+ return NotImplemented
+
+ def __le__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) <= 0
+ else:
+ return NotImplemented
+
+ def __lt__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) < 0
+ else:
+ return NotImplemented
+
+ def __ge__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) >= 0
+ else:
+ return NotImplemented
+
+ def __gt__(self, other):
+ if isinstance(other, timedelta):
+ return self._cmp(other) > 0
+ else:
+ return NotImplemented
+
+ def _cmp(self, other):
+ assert isinstance(other, timedelta)
+ return _cmp(self._getstate(), other._getstate())
+
+ def __hash__(self):
+ if self._hashcode == -1:
+ self._hashcode = hash(self._getstate())
+ return self._hashcode
+
+ def __bool__(self):
+ return (self._days != 0 or
+ self._seconds != 0 or
+ self._microseconds != 0)
+
+ # Pickle support.
+
+ def _getstate(self):
+ return (self._days, self._seconds, self._microseconds)
+
+ def __reduce__(self):
+ return (self.__class__, self._getstate())
+
+timedelta.min = timedelta(-999999999)
+timedelta.max = timedelta(days=999999999, hours=23, minutes=59, seconds=59,
+ microseconds=999999)
+timedelta.resolution = timedelta(microseconds=1)
+
+class date:
+ """Concrete date type.
+
+ Constructors:
+
+ __new__()
+ fromtimestamp()
+ today()
+ fromordinal()
+
+ Operators:
+
+ __repr__, __str__
+ __eq__, __le__, __lt__, __ge__, __gt__, __hash__
+ __add__, __radd__, __sub__ (add/radd only with timedelta arg)
+
+ Methods:
+
+ timetuple()
+ toordinal()
+ weekday()
+ isoweekday(), isocalendar(), isoformat()
+ ctime()
+ strftime()
+
+ Properties (readonly):
+ year, month, day
+ """
+ __slots__ = '_year', '_month', '_day', '_hashcode'
+
+ def __new__(cls, year, month=None, day=None):
+ """Constructor.
+
+ Arguments:
+
+ year, month, day (required, base 1)
+ """
+ if (month is None and
+ isinstance(year, (bytes, str)) and len(year) == 4 and
+ 1 <= ord(year[2:3]) <= 12):
+ # Pickle support
+ if isinstance(year, str):
+ try:
+ year = year.encode('latin1')
+ except UnicodeEncodeError:
+ # More informative error message.
+ raise ValueError(
+ "Failed to encode latin1 string when unpickling "
+ "a date object. "
+ "pickle.load(data, encoding='latin1') is assumed.")
+ self = object.__new__(cls)
+ self.__setstate(year)
+ self._hashcode = -1
+ return self
+ year, month, day = _check_date_fields(year, month, day)
+ self = object.__new__(cls)
+ self._year = year
+ self._month = month
+ self._day = day
+ self._hashcode = -1
+ return self
+
+ # Additional constructors
+
+ @classmethod
+ def fromtimestamp(cls, t):
+ "Construct a date from a POSIX timestamp (like time.time())."
+ y, m, d, hh, mm, ss, weekday, jday, dst = _time.localtime(t)
+ return cls(y, m, d)
+
+ @classmethod
+ def today(cls):
+ "Construct a date from time.time()."
+ t = _time.time()
+ return cls.fromtimestamp(t)
+
+ @classmethod
+ def fromordinal(cls, n):
+ """Construct a date from a proleptic Gregorian ordinal.
+
+ January 1 of year 1 is day 1. Only the year, month and day are
+ non-zero in the result.
+ """
+ y, m, d = _ord2ymd(n)
+ return cls(y, m, d)
+
+ @classmethod
+ def fromisoformat(cls, date_string):
+ """Construct a date from the output of date.isoformat()."""
+ if not isinstance(date_string, str):
+ raise TypeError('fromisoformat: argument must be str')
+
+ try:
+ assert len(date_string) == 10
+ return cls(*_parse_isoformat_date(date_string))
+ except Exception:
+ raise ValueError(f'Invalid isoformat string: {date_string!r}')
+
+ @classmethod
+ def fromisocalendar(cls, year, week, day):
+ """Construct a date from the ISO year, week number and weekday.
+
+ This is the inverse of the date.isocalendar() function"""
+ # Year is bounded this way because 9999-12-31 is (9999, 52, 5)
+ if not MINYEAR <= year <= MAXYEAR:
+ raise ValueError(f"Year is out of range: {year}")
+
+ if not 0 < week < 53:
+ out_of_range = True
+
+ if week == 53:
+ # ISO years have 53 weeks in them on years starting with a
+ # Thursday and leap years starting on a Wednesday
+ first_weekday = _ymd2ord(year, 1, 1) % 7
+ if (first_weekday == 4 or (first_weekday == 3 and
+ _is_leap(year))):
+ out_of_range = False
+
+ if out_of_range:
+ raise ValueError(f"Invalid week: {week}")
+
+ if not 0 < day < 8:
+ raise ValueError(f"Invalid weekday: {day} (range is [1, 7])")
+
+ # Now compute the offset from (Y, 1, 1) in days:
+ day_offset = (week - 1) * 7 + (day - 1)
+
+ # Calculate the ordinal day for monday, week 1
+ day_1 = _isoweek1monday(year)
+ ord_day = day_1 + day_offset
+
+ return cls(*_ord2ymd(ord_day))
+
+ # Conversions to string
+
+ def __repr__(self):
+ """Convert to formal string, for repr().
+
+ >>> dt = datetime(2010, 1, 1)
+ >>> repr(dt)
+ 'datetime.datetime(2010, 1, 1, 0, 0)'
+
+ >>> dt = datetime(2010, 1, 1, tzinfo=timezone.utc)
+ >>> repr(dt)
+ 'datetime.datetime(2010, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)'
+ """
+ return "%s.%s(%d, %d, %d)" % (self.__class__.__module__,
+ self.__class__.__qualname__,
+ self._year,
+ self._month,
+ self._day)
+ # XXX These shouldn't depend on time.localtime(), because that
+ # clips the usable dates to [1970 .. 2038). At least ctime() is
+ # easily done without using strftime() -- that's better too because
+ # strftime("%c", ...) is locale specific.
+
+
+ def ctime(self):
+ "Return ctime() style string."
+ weekday = self.toordinal() % 7 or 7
+ return "%s %s %2d 00:00:00 %04d" % (
+ _DAYNAMES[weekday],
+ _MONTHNAMES[self._month],
+ self._day, self._year)
+
+ def strftime(self, fmt):
+ "Format using strftime()."
+ return _wrap_strftime(self, fmt, self.timetuple())
+
+ def __format__(self, fmt):
+ if not isinstance(fmt, str):
+ raise TypeError("must be str, not %s" % type(fmt).__name__)
+ if len(fmt) != 0:
+ return self.strftime(fmt)
+ return str(self)
+
+ def isoformat(self):
+ """Return the date formatted according to ISO.
+
+ This is 'YYYY-MM-DD'.
+
+ References:
+ - http://www.w3.org/TR/NOTE-datetime
+ - http://www.cl.cam.ac.uk/~mgk25/iso-time.html
+ """
+ return "%04d-%02d-%02d" % (self._year, self._month, self._day)
+
+ __str__ = isoformat
+
+ # Read-only field accessors
+ @property
+ def year(self):
+ """year (1-9999)"""
+ return self._year
+
+ @property
+ def month(self):
+ """month (1-12)"""
+ return self._month
+
+ @property
+ def day(self):
+ """day (1-31)"""
+ return self._day
+
+ # Standard conversions, __eq__, __le__, __lt__, __ge__, __gt__,
+ # __hash__ (and helpers)
+
+ def timetuple(self):
+ "Return local time tuple compatible with time.localtime()."
+ return _build_struct_time(self._year, self._month, self._day,
+ 0, 0, 0, -1)
+
+ def toordinal(self):
+ """Return proleptic Gregorian ordinal for the year, month and day.
+
+ January 1 of year 1 is day 1. Only the year, month and day values
+ contribute to the result.
+ """
+ return _ymd2ord(self._year, self._month, self._day)
+
+ def replace(self, year=None, month=None, day=None):
+ """Return a new date with new values for the specified fields."""
+ if year is None:
+ year = self._year
+ if month is None:
+ month = self._month
+ if day is None:
+ day = self._day
+ return type(self)(year, month, day)
+
+ # Comparisons of date objects with other.
+
+ def __eq__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) == 0
+ return NotImplemented
+
+ def __le__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) <= 0
+ return NotImplemented
+
+ def __lt__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) < 0
+ return NotImplemented
+
+ def __ge__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) >= 0
+ return NotImplemented
+
+ def __gt__(self, other):
+ if isinstance(other, date):
+ return self._cmp(other) > 0
+ return NotImplemented
+
+ def _cmp(self, other):
+ assert isinstance(other, date)
+ y, m, d = self._year, self._month, self._day
+ y2, m2, d2 = other._year, other._month, other._day
+ return _cmp((y, m, d), (y2, m2, d2))
+
+ def __hash__(self):
+ "Hash."
+ if self._hashcode == -1:
+ self._hashcode = hash(self._getstate())
+ return self._hashcode
+
+ # Computations
+
+ def __add__(self, other):
+ "Add a date to a timedelta."
+ if isinstance(other, timedelta):
+ o = self.toordinal() + other.days
+ if 0 < o <= _MAXORDINAL:
+ return type(self).fromordinal(o)
+ raise OverflowError("result out of range")
+ return NotImplemented
+
+ __radd__ = __add__
+
+ def __sub__(self, other):
+ """Subtract two dates, or a date and a timedelta."""
+ if isinstance(other, timedelta):
+ return self + timedelta(-other.days)
+ if isinstance(other, date):
+ days1 = self.toordinal()
+ days2 = other.toordinal()
+ return timedelta(days1 - days2)
+ return NotImplemented
+
+ def weekday(self):
+ "Return day of the week, where Monday == 0 ... Sunday == 6."
+ return (self.toordinal() + 6) % 7
+
+ # Day-of-the-week and week-of-the-year, according to ISO
+
+ def isoweekday(self):
+ "Return day of the week, where Monday == 1 ... Sunday == 7."
+ # 1-Jan-0001 is a Monday
+ return self.toordinal() % 7 or 7
+
+ def isocalendar(self):
+ """Return a 3-tuple containing ISO year, week number, and weekday.
+
+ The first ISO week of the year is the (Mon-Sun) week
+ containing the year's first Thursday; everything else derives
+ from that.
+
+ The first week is 1; Monday is 1 ... Sunday is 7.
+
+ ISO calendar algorithm taken from
+ http://www.phys.uu.nl/~vgent/calendar/isocalendar.htm
+ (used with permission)
+ """
+ year = self._year
+ week1monday = _isoweek1monday(year)
+ today = _ymd2ord(self._year, self._month, self._day)
+ # Internally, week and day have origin 0
+ week, day = divmod(today - week1monday, 7)
+ if week < 0:
+ year -= 1
+ week1monday = _isoweek1monday(year)
+ week, day = divmod(today - week1monday, 7)
+ elif week >= 52:
+ if today >= _isoweek1monday(year+1):
+ year += 1
+ week = 0
+ return year, week+1, day+1
+
+ # Pickle support.
+
+ def _getstate(self):
+ yhi, ylo = divmod(self._year, 256)
+ return bytes([yhi, ylo, self._month, self._day]),
+
+ def __setstate(self, string):
+ yhi, ylo, self._month, self._day = string
+ self._year = yhi * 256 + ylo
+
+ def __reduce__(self):
+ return (self.__class__, self._getstate())
+
+_date_class = date # so functions w/ args named "date" can get at the class
+
+date.min = date(1, 1, 1)
+date.max = date(9999, 12, 31)
+date.resolution = timedelta(days=1)
+
+
+class tzinfo:
+ """Abstract base class for time zone info classes.
+
+ Subclasses must override the name(), utcoffset() and dst() methods.
+ """
+ __slots__ = ()
+
+ def tzname(self, dt):
+ "datetime -> string name of time zone."
+ raise NotImplementedError("tzinfo subclass must override tzname()")
+
+ def utcoffset(self, dt):
+ "datetime -> timedelta, positive for east of UTC, negative for west of UTC"
+ raise NotImplementedError("tzinfo subclass must override utcoffset()")
+
+ def dst(self, dt):
+ """datetime -> DST offset as timedelta, positive for east of UTC.
+
+ Return 0 if DST not in effect. utcoffset() must include the DST
+ offset.
+ """
+ raise NotImplementedError("tzinfo subclass must override dst()")
+
+ def fromutc(self, dt):
+ "datetime in UTC -> datetime in local time."
+
+ if not isinstance(dt, datetime):
+ raise TypeError("fromutc() requires a datetime argument")
+ if dt.tzinfo is not self:
+ raise ValueError("dt.tzinfo is not self")
+
+ dtoff = dt.utcoffset()
+ if dtoff is None:
+ raise ValueError("fromutc() requires a non-None utcoffset() "
+ "result")
+
+ # See the long comment block at the end of this file for an
+ # explanation of this algorithm.
+ dtdst = dt.dst()
+ if dtdst is None:
+ raise ValueError("fromutc() requires a non-None dst() result")
+ delta = dtoff - dtdst
+ if delta:
+ dt += delta
+ dtdst = dt.dst()
+ if dtdst is None:
+ raise ValueError("fromutc(): dt.dst gave inconsistent "
+ "results; cannot convert")
+ return dt + dtdst
+
+ # Pickle support.
+
+ def __reduce__(self):
+ getinitargs = getattr(self, "__getinitargs__", None)
+ if getinitargs:
+ args = getinitargs()
+ else:
+ args = ()
+ getstate = getattr(self, "__getstate__", None)
+ if getstate:
+ state = getstate()
+ else:
+ state = getattr(self, "__dict__", None) or None
+ if state is None:
+ return (self.__class__, args)
+ else:
+ return (self.__class__, args, state)
+
+_tzinfo_class = tzinfo
+
+class time:
+ """Time with time zone.
+
+ Constructors:
+
+ __new__()
+
+ Operators:
+
+ __repr__, __str__
+ __eq__, __le__, __lt__, __ge__, __gt__, __hash__
+
+ Methods:
+
+ strftime()
+ isoformat()
+ utcoffset()
+ tzname()
+ dst()
+
+ Properties (readonly):
+ hour, minute, second, microsecond, tzinfo, fold
+ """
+ __slots__ = '_hour', '_minute', '_second', '_microsecond', '_tzinfo', '_hashcode', '_fold'
+
+ def __new__(cls, hour=0, minute=0, second=0, microsecond=0, tzinfo=None, *, fold=0):
+ """Constructor.
+
+ Arguments:
+
+ hour, minute (required)
+ second, microsecond (default to zero)
+ tzinfo (default to None)
+ fold (keyword only, default to zero)
+ """
+ if (isinstance(hour, (bytes, str)) and len(hour) == 6 and
+ ord(hour[0:1])&0x7F < 24):
+ # Pickle support
+ if isinstance(hour, str):
+ try:
+ hour = hour.encode('latin1')
+ except UnicodeEncodeError:
+ # More informative error message.
+ raise ValueError(
+ "Failed to encode latin1 string when unpickling "
+ "a time object. "
+ "pickle.load(data, encoding='latin1') is assumed.")
+ self = object.__new__(cls)
+ self.__setstate(hour, minute or None)
+ self._hashcode = -1
+ return self
+ hour, minute, second, microsecond, fold = _check_time_fields(
+ hour, minute, second, microsecond, fold)
+ _check_tzinfo_arg(tzinfo)
+ self = object.__new__(cls)
+ self._hour = hour
+ self._minute = minute
+ self._second = second
+ self._microsecond = microsecond
+ self._tzinfo = tzinfo
+ self._hashcode = -1
+ self._fold = fold
+ return self
+
+ # Read-only field accessors
+ @property
+ def hour(self):
+ """hour (0-23)"""
+ return self._hour
+
+ @property
+ def minute(self):
+ """minute (0-59)"""
+ return self._minute
+
+ @property
+ def second(self):
+ """second (0-59)"""
+ return self._second
+
+ @property
+ def microsecond(self):
+ """microsecond (0-999999)"""
+ return self._microsecond
+
+ @property
+ def tzinfo(self):
+ """timezone info object"""
+ return self._tzinfo
+
+ @property
+ def fold(self):
+ return self._fold
+
+ # Standard conversions, __hash__ (and helpers)
+
+ # Comparisons of time objects with other.
+
+ def __eq__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other, allow_mixed=True) == 0
+ else:
+ return NotImplemented
+
+ def __le__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other) <= 0
+ else:
+ return NotImplemented
+
+ def __lt__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other) < 0
+ else:
+ return NotImplemented
+
+ def __ge__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other) >= 0
+ else:
+ return NotImplemented
+
+ def __gt__(self, other):
+ if isinstance(other, time):
+ return self._cmp(other) > 0
+ else:
+ return NotImplemented
+
+ def _cmp(self, other, allow_mixed=False):
+ assert isinstance(other, time)
+ mytz = self._tzinfo
+ ottz = other._tzinfo
+ myoff = otoff = None
+
+ if mytz is ottz:
+ base_compare = True
+ else:
+ myoff = self.utcoffset()
+ otoff = other.utcoffset()
+ base_compare = myoff == otoff
+
+ if base_compare:
+ return _cmp((self._hour, self._minute, self._second,
+ self._microsecond),
+ (other._hour, other._minute, other._second,
+ other._microsecond))
+ if myoff is None or otoff is None:
+ if allow_mixed:
+ return 2 # arbitrary non-zero value
+ else:
+ raise TypeError("cannot compare naive and aware times")
+ myhhmm = self._hour * 60 + self._minute - myoff//timedelta(minutes=1)
+ othhmm = other._hour * 60 + other._minute - otoff//timedelta(minutes=1)
+ return _cmp((myhhmm, self._second, self._microsecond),
+ (othhmm, other._second, other._microsecond))
+
+ def __hash__(self):
+ """Hash."""
+ if self._hashcode == -1:
+ if self.fold:
+ t = self.replace(fold=0)
+ else:
+ t = self
+ tzoff = t.utcoffset()
+ if not tzoff: # zero or None
+ self._hashcode = hash(t._getstate()[0])
+ else:
+ h, m = divmod(timedelta(hours=self.hour, minutes=self.minute) - tzoff,
+ timedelta(hours=1))
+ assert not m % timedelta(minutes=1), "whole minute"
+ m //= timedelta(minutes=1)
+ if 0 <= h < 24:
+ self._hashcode = hash(time(h, m, self.second, self.microsecond))
+ else:
+ self._hashcode = hash((h, m, self.second, self.microsecond))
+ return self._hashcode
+
+ # Conversion to string
+
+ def _tzstr(self):
+ """Return formatted timezone offset (+xx:xx) or an empty string."""
+ off = self.utcoffset()
+ return _format_offset(off)
+
+ def __repr__(self):
+ """Convert to formal string, for repr()."""
+ if self._microsecond != 0:
+ s = ", %d, %d" % (self._second, self._microsecond)
+ elif self._second != 0:
+ s = ", %d" % self._second
+ else:
+ s = ""
+ s= "%s.%s(%d, %d%s)" % (self.__class__.__module__,
+ self.__class__.__qualname__,
+ self._hour, self._minute, s)
+ if self._tzinfo is not None:
+ assert s[-1:] == ")"
+ s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")"
+ if self._fold:
+ assert s[-1:] == ")"
+ s = s[:-1] + ", fold=1)"
+ return s
+
+ def isoformat(self, timespec='auto'):
+ """Return the time formatted according to ISO.
+
+ The full format is 'HH:MM:SS.mmmmmm+zz:zz'. By default, the fractional
+ part is omitted if self.microsecond == 0.
+
+ The optional argument timespec specifies the number of additional
+ terms of the time to include.
+ """
+ s = _format_time(self._hour, self._minute, self._second,
+ self._microsecond, timespec)
+ tz = self._tzstr()
+ if tz:
+ s += tz
+ return s
+
+ __str__ = isoformat
+
+ @classmethod
+ def fromisoformat(cls, time_string):
+ """Construct a time from the output of isoformat()."""
+ if not isinstance(time_string, str):
+ raise TypeError('fromisoformat: argument must be str')
+
+ try:
+ return cls(*_parse_isoformat_time(time_string))
+ except Exception:
+ raise ValueError(f'Invalid isoformat string: {time_string!r}')
+
+
+ def strftime(self, fmt):
+ """Format using strftime(). The date part of the timestamp passed
+ to underlying strftime should not be used.
+ """
+ # The year must be >= 1000 else Python's strftime implementation
+ # can raise a bogus exception.
+ timetuple = (1900, 1, 1,
+ self._hour, self._minute, self._second,
+ 0, 1, -1)
+ return _wrap_strftime(self, fmt, timetuple)
+
+ def __format__(self, fmt):
+ if not isinstance(fmt, str):
+ raise TypeError("must be str, not %s" % type(fmt).__name__)
+ if len(fmt) != 0:
+ return self.strftime(fmt)
+ return str(self)
+
+ # Timezone functions
+
+ def utcoffset(self):
+ """Return the timezone offset as timedelta, positive east of UTC
+ (negative west of UTC)."""
+ if self._tzinfo is None:
+ return None
+ offset = self._tzinfo.utcoffset(None)
+ _check_utc_offset("utcoffset", offset)
+ return offset
+
+ def tzname(self):
+ """Return the timezone name.
+
+ Note that the name is 100% informational -- there's no requirement that
+ it mean anything in particular. For example, "GMT", "UTC", "-500",
+ "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies.
+ """
+ if self._tzinfo is None:
+ return None
+ name = self._tzinfo.tzname(None)
+ _check_tzname(name)
+ return name
+
+ def dst(self):
+ """Return 0 if DST is not in effect, or the DST offset (as timedelta
+ positive eastward) if DST is in effect.
+
+ This is purely informational; the DST offset has already been added to
+ the UTC offset returned by utcoffset() if applicable, so there's no
+ need to consult dst() unless you're interested in displaying the DST
+ info.
+ """
+ if self._tzinfo is None:
+ return None
+ offset = self._tzinfo.dst(None)
+ _check_utc_offset("dst", offset)
+ return offset
+
+ def replace(self, hour=None, minute=None, second=None, microsecond=None,
+ tzinfo=True, *, fold=None):
+ """Return a new time with new values for the specified fields."""
+ if hour is None:
+ hour = self.hour
+ if minute is None:
+ minute = self.minute
+ if second is None:
+ second = self.second
+ if microsecond is None:
+ microsecond = self.microsecond
+ if tzinfo is True:
+ tzinfo = self.tzinfo
+ if fold is None:
+ fold = self._fold
+ return type(self)(hour, minute, second, microsecond, tzinfo, fold=fold)
+
+ # Pickle support.
+
+ def _getstate(self, protocol=3):
+ us2, us3 = divmod(self._microsecond, 256)
+ us1, us2 = divmod(us2, 256)
+ h = self._hour
+ if self._fold and protocol > 3:
+ h += 128
+ basestate = bytes([h, self._minute, self._second,
+ us1, us2, us3])
+ if self._tzinfo is None:
+ return (basestate,)
+ else:
+ return (basestate, self._tzinfo)
+
+ def __setstate(self, string, tzinfo):
+ if tzinfo is not None and not isinstance(tzinfo, _tzinfo_class):
+ raise TypeError("bad tzinfo state arg")
+ h, self._minute, self._second, us1, us2, us3 = string
+ if h > 127:
+ self._fold = 1
+ self._hour = h - 128
+ else:
+ self._fold = 0
+ self._hour = h
+ self._microsecond = (((us1 << 8) | us2) << 8) | us3
+ self._tzinfo = tzinfo
+
+ def __reduce_ex__(self, protocol):
+ return (time, self._getstate(protocol))
+
+ def __reduce__(self):
+ return self.__reduce_ex__(2)
+
+_time_class = time # so functions w/ args named "time" can get at the class
+
+time.min = time(0, 0, 0)
+time.max = time(23, 59, 59, 999999)
+time.resolution = timedelta(microseconds=1)
+
+class datetime(date):
+ """datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])
+
+ The year, month and day arguments are required. tzinfo may be None, or an
+ instance of a tzinfo subclass. The remaining arguments may be ints.
+ """
+ __slots__ = date.__slots__ + time.__slots__
+
+ def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0,
+ microsecond=0, tzinfo=None, *, fold=0):
+ if (isinstance(year, (bytes, str)) and len(year) == 10 and
+ 1 <= ord(year[2:3])&0x7F <= 12):
+ # Pickle support
+ if isinstance(year, str):
+ try:
+ year = bytes(year, 'latin1')
+ except UnicodeEncodeError:
+ # More informative error message.
+ raise ValueError(
+ "Failed to encode latin1 string when unpickling "
+ "a datetime object. "
+ "pickle.load(data, encoding='latin1') is assumed.")
+ self = object.__new__(cls)
+ self.__setstate(year, month)
+ self._hashcode = -1
+ return self
+ year, month, day = _check_date_fields(year, month, day)
+ hour, minute, second, microsecond, fold = _check_time_fields(
+ hour, minute, second, microsecond, fold)
+ _check_tzinfo_arg(tzinfo)
+ self = object.__new__(cls)
+ self._year = year
+ self._month = month
+ self._day = day
+ self._hour = hour
+ self._minute = minute
+ self._second = second
+ self._microsecond = microsecond
+ self._tzinfo = tzinfo
+ self._hashcode = -1
+ self._fold = fold
+ return self
+
+ # Read-only field accessors
+ @property
+ def hour(self):
+ """hour (0-23)"""
+ return self._hour
+
+ @property
+ def minute(self):
+ """minute (0-59)"""
+ return self._minute
+
+ @property
+ def second(self):
+ """second (0-59)"""
+ return self._second
+
+ @property
+ def microsecond(self):
+ """microsecond (0-999999)"""
+ return self._microsecond
+
+ @property
+ def tzinfo(self):
+ """timezone info object"""
+ return self._tzinfo
+
+ @property
+ def fold(self):
+ return self._fold
+
+ @classmethod
+ def _fromtimestamp(cls, t, utc, tz):
+ """Construct a datetime from a POSIX timestamp (like time.time()).
+
+ A timezone info object may be passed in as well.
+ """
+ frac, t = _math.modf(t)
+ us = round(frac * 1e6)
+ if us >= 1000000:
+ t += 1
+ us -= 1000000
+ elif us < 0:
+ t -= 1
+ us += 1000000
+
+ converter = _time.gmtime if utc else _time.localtime
+ y, m, d, hh, mm, ss, weekday, jday, dst = converter(t)
+ ss = min(ss, 59) # clamp out leap seconds if the platform has them
+ result = cls(y, m, d, hh, mm, ss, us, tz)
+ if tz is None:
+ # As of version 2015f max fold in IANA database is
+ # 23 hours at 1969-09-30 13:00:00 in Kwajalein.
+ # Let's probe 24 hours in the past to detect a transition:
+ max_fold_seconds = 24 * 3600
+
+ # On Windows localtime_s throws an OSError for negative values,
+ # thus we can't perform fold detection for values of time less
+ # than the max time fold. See comments in _datetimemodule's
+ # version of this method for more details.
+ if t < max_fold_seconds and sys.platform.startswith("win"):
+ return result
+
+ y, m, d, hh, mm, ss = converter(t - max_fold_seconds)[:6]
+ probe1 = cls(y, m, d, hh, mm, ss, us, tz)
+ trans = result - probe1 - timedelta(0, max_fold_seconds)
+ if trans.days < 0:
+ y, m, d, hh, mm, ss = converter(t + trans // timedelta(0, 1))[:6]
+ probe2 = cls(y, m, d, hh, mm, ss, us, tz)
+ if probe2 == result:
+ result._fold = 1
+ else:
+ result = tz.fromutc(result)
+ return result
+
+ @classmethod
+ def fromtimestamp(cls, t, tz=None):
+ """Construct a datetime from a POSIX timestamp (like time.time()).
+
+ A timezone info object may be passed in as well.
+ """
+ _check_tzinfo_arg(tz)
+
+ return cls._fromtimestamp(t, tz is not None, tz)
+
+ @classmethod
+ def utcfromtimestamp(cls, t):
+ """Construct a naive UTC datetime from a POSIX timestamp."""
+ return cls._fromtimestamp(t, True, None)
+
+ @classmethod
+ def now(cls, tz=None):
+ "Construct a datetime from time.time() and optional time zone info."
+ t = _time.time()
+ return cls.fromtimestamp(t, tz)
+
+ @classmethod
+ def utcnow(cls):
+ "Construct a UTC datetime from time.time()."
+ t = _time.time()
+ return cls.utcfromtimestamp(t)
+
+ @classmethod
+ def combine(cls, date, time, tzinfo=True):
+ "Construct a datetime from a given date and a given time."
+ if not isinstance(date, _date_class):
+ raise TypeError("date argument must be a date instance")
+ if not isinstance(time, _time_class):
+ raise TypeError("time argument must be a time instance")
+ if tzinfo is True:
+ tzinfo = time.tzinfo
+ return cls(date.year, date.month, date.day,
+ time.hour, time.minute, time.second, time.microsecond,
+ tzinfo, fold=time.fold)
+
+ @classmethod
+ def fromisoformat(cls, date_string):
+ """Construct a datetime from the output of datetime.isoformat()."""
+ if not isinstance(date_string, str):
+ raise TypeError('fromisoformat: argument must be str')
+
+ # Split this at the separator
+ dstr = date_string[0:10]
+ tstr = date_string[11:]
+
+ try:
+ date_components = _parse_isoformat_date(dstr)
+ except ValueError:
+ raise ValueError(f'Invalid isoformat string: {date_string!r}')
+
+ if tstr:
+ try:
+ time_components = _parse_isoformat_time(tstr)
+ except ValueError:
+ raise ValueError(f'Invalid isoformat string: {date_string!r}')
+ else:
+ time_components = [0, 0, 0, 0, None]
+
+ return cls(*(date_components + time_components))
+
+ def timetuple(self):
+ "Return local time tuple compatible with time.localtime()."
+ dst = self.dst()
+ if dst is None:
+ dst = -1
+ elif dst:
+ dst = 1
+ else:
+ dst = 0
+ return _build_struct_time(self.year, self.month, self.day,
+ self.hour, self.minute, self.second,
+ dst)
+
+ def _mktime(self):
+ """Return integer POSIX timestamp."""
+ epoch = datetime(1970, 1, 1)
+ max_fold_seconds = 24 * 3600
+ t = (self - epoch) // timedelta(0, 1)
+ def local(u):
+ y, m, d, hh, mm, ss = _time.localtime(u)[:6]
+ return (datetime(y, m, d, hh, mm, ss) - epoch) // timedelta(0, 1)
+
+ # Our goal is to solve t = local(u) for u.
+ a = local(t) - t
+ u1 = t - a
+ t1 = local(u1)
+ if t1 == t:
+ # We found one solution, but it may not be the one we need.
+ # Look for an earlier solution (if `fold` is 0), or a
+ # later one (if `fold` is 1).
+ u2 = u1 + (-max_fold_seconds, max_fold_seconds)[self.fold]
+ b = local(u2) - u2
+ if a == b:
+ return u1
+ else:
+ b = t1 - u1
+ assert a != b
+ u2 = t - b
+ t2 = local(u2)
+ if t2 == t:
+ return u2
+ if t1 == t:
+ return u1
+ # We have found both offsets a and b, but neither t - a nor t - b is
+ # a solution. This means t is in the gap.
+ return (max, min)[self.fold](u1, u2)
+
+
+ def timestamp(self):
+ "Return POSIX timestamp as float"
+ if self._tzinfo is None:
+ s = self._mktime()
+ return s + self.microsecond / 1e6
+ else:
+ return (self - _EPOCH).total_seconds()
+
+ def utctimetuple(self):
+ "Return UTC time tuple compatible with time.gmtime()."
+ offset = self.utcoffset()
+ if offset:
+ self -= offset
+ y, m, d = self.year, self.month, self.day
+ hh, mm, ss = self.hour, self.minute, self.second
+ return _build_struct_time(y, m, d, hh, mm, ss, 0)
+
+ def date(self):
+ "Return the date part."
+ return date(self._year, self._month, self._day)
+
+ def time(self):
+ "Return the time part, with tzinfo None."
+ return time(self.hour, self.minute, self.second, self.microsecond, fold=self.fold)
+
+ def timetz(self):
+ "Return the time part, with same tzinfo."
+ return time(self.hour, self.minute, self.second, self.microsecond,
+ self._tzinfo, fold=self.fold)
+
+ def replace(self, year=None, month=None, day=None, hour=None,
+ minute=None, second=None, microsecond=None, tzinfo=True,
+ *, fold=None):
+ """Return a new datetime with new values for the specified fields."""
+ if year is None:
+ year = self.year
+ if month is None:
+ month = self.month
+ if day is None:
+ day = self.day
+ if hour is None:
+ hour = self.hour
+ if minute is None:
+ minute = self.minute
+ if second is None:
+ second = self.second
+ if microsecond is None:
+ microsecond = self.microsecond
+ if tzinfo is True:
+ tzinfo = self.tzinfo
+ if fold is None:
+ fold = self.fold
+ return type(self)(year, month, day, hour, minute, second,
+ microsecond, tzinfo, fold=fold)
+
+ def _local_timezone(self):
+ if self.tzinfo is None:
+ ts = self._mktime()
+ else:
+ ts = (self - _EPOCH) // timedelta(seconds=1)
+ localtm = _time.localtime(ts)
+ local = datetime(*localtm[:6])
+ # Extract TZ data
+ gmtoff = localtm.tm_gmtoff
+ zone = localtm.tm_zone
+ return timezone(timedelta(seconds=gmtoff), zone)
+
+ def astimezone(self, tz=None):
+ if tz is None:
+ tz = self._local_timezone()
+ elif not isinstance(tz, tzinfo):
+ raise TypeError("tz argument must be an instance of tzinfo")
+
+ mytz = self.tzinfo
+ if mytz is None:
+ mytz = self._local_timezone()
+ myoffset = mytz.utcoffset(self)
+ else:
+ myoffset = mytz.utcoffset(self)
+ if myoffset is None:
+ mytz = self.replace(tzinfo=None)._local_timezone()
+ myoffset = mytz.utcoffset(self)
+
+ if tz is mytz:
+ return self
+
+ # Convert self to UTC, and attach the new time zone object.
+ utc = (self - myoffset).replace(tzinfo=tz)
+
+ # Convert from UTC to tz's local time.
+ return tz.fromutc(utc)
+
+ # Ways to produce a string.
+
+ def ctime(self):
+ "Return ctime() style string."
+ weekday = self.toordinal() % 7 or 7
+ return "%s %s %2d %02d:%02d:%02d %04d" % (
+ _DAYNAMES[weekday],
+ _MONTHNAMES[self._month],
+ self._day,
+ self._hour, self._minute, self._second,
+ self._year)
+
+ def isoformat(self, sep='T', timespec='auto'):
+ """Return the time formatted according to ISO.
+
+ The full format looks like 'YYYY-MM-DD HH:MM:SS.mmmmmm'.
+ By default, the fractional part is omitted if self.microsecond == 0.
+
+ If self.tzinfo is not None, the UTC offset is also attached, giving
+ giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'.
+
+ Optional argument sep specifies the separator between date and
+ time, default 'T'.
+
+ The optional argument timespec specifies the number of additional
+ terms of the time to include.
+ """
+ s = ("%04d-%02d-%02d%c" % (self._year, self._month, self._day, sep) +
+ _format_time(self._hour, self._minute, self._second,
+ self._microsecond, timespec))
+
+ off = self.utcoffset()
+ tz = _format_offset(off)
+ if tz:
+ s += tz
+
+ return s
+
+ def __repr__(self):
+ """Convert to formal string, for repr()."""
+ L = [self._year, self._month, self._day, # These are never zero
+ self._hour, self._minute, self._second, self._microsecond]
+ if L[-1] == 0:
+ del L[-1]
+ if L[-1] == 0:
+ del L[-1]
+ s = "%s.%s(%s)" % (self.__class__.__module__,
+ self.__class__.__qualname__,
+ ", ".join(map(str, L)))
+ if self._tzinfo is not None:
+ assert s[-1:] == ")"
+ s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")"
+ if self._fold:
+ assert s[-1:] == ")"
+ s = s[:-1] + ", fold=1)"
+ return s
+
+ def __str__(self):
+ "Convert to string, for str()."
+ return self.isoformat(sep=' ')
+
+ @classmethod
+ def strptime(cls, date_string, format):
+ 'string, format -> new datetime parsed from a string (like time.strptime()).'
+ import _strptime
+ return _strptime._strptime_datetime(cls, date_string, format)
+
+ def utcoffset(self):
+ """Return the timezone offset as timedelta positive east of UTC (negative west of
+ UTC)."""
+ if self._tzinfo is None:
+ return None
+ offset = self._tzinfo.utcoffset(self)
+ _check_utc_offset("utcoffset", offset)
+ return offset
+
+ def tzname(self):
+ """Return the timezone name.
+
+ Note that the name is 100% informational -- there's no requirement that
+ it mean anything in particular. For example, "GMT", "UTC", "-500",
+ "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies.
+ """
+ if self._tzinfo is None:
+ return None
+ name = self._tzinfo.tzname(self)
+ _check_tzname(name)
+ return name
+
+ def dst(self):
+ """Return 0 if DST is not in effect, or the DST offset (as timedelta
+ positive eastward) if DST is in effect.
+
+ This is purely informational; the DST offset has already been added to
+ the UTC offset returned by utcoffset() if applicable, so there's no
+ need to consult dst() unless you're interested in displaying the DST
+ info.
+ """
+ if self._tzinfo is None:
+ return None
+ offset = self._tzinfo.dst(self)
+ _check_utc_offset("dst", offset)
+ return offset
+
+ # Comparisons of datetime objects with other.
+
+ def __eq__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other, allow_mixed=True) == 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ return False
+
+ def __le__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other) <= 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ _cmperror(self, other)
+
+ def __lt__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other) < 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ _cmperror(self, other)
+
+ def __ge__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other) >= 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ _cmperror(self, other)
+
+ def __gt__(self, other):
+ if isinstance(other, datetime):
+ return self._cmp(other) > 0
+ elif not isinstance(other, date):
+ return NotImplemented
+ else:
+ _cmperror(self, other)
+
+ def _cmp(self, other, allow_mixed=False):
+ assert isinstance(other, datetime)
+ mytz = self._tzinfo
+ ottz = other._tzinfo
+ myoff = otoff = None
+
+ if mytz is ottz:
+ base_compare = True
+ else:
+ myoff = self.utcoffset()
+ otoff = other.utcoffset()
+ # Assume that allow_mixed means that we are called from __eq__
+ if allow_mixed:
+ if myoff != self.replace(fold=not self.fold).utcoffset():
+ return 2
+ if otoff != other.replace(fold=not other.fold).utcoffset():
+ return 2
+ base_compare = myoff == otoff
+
+ if base_compare:
+ return _cmp((self._year, self._month, self._day,
+ self._hour, self._minute, self._second,
+ self._microsecond),
+ (other._year, other._month, other._day,
+ other._hour, other._minute, other._second,
+ other._microsecond))
+ if myoff is None or otoff is None:
+ if allow_mixed:
+ return 2 # arbitrary non-zero value
+ else:
+ raise TypeError("cannot compare naive and aware datetimes")
+ # XXX What follows could be done more efficiently...
+ diff = self - other # this will take offsets into account
+ if diff.days < 0:
+ return -1
+ return diff and 1 or 0
+
+ def __add__(self, other):
+ "Add a datetime and a timedelta."
+ if not isinstance(other, timedelta):
+ return NotImplemented
+ delta = timedelta(self.toordinal(),
+ hours=self._hour,
+ minutes=self._minute,
+ seconds=self._second,
+ microseconds=self._microsecond)
+ delta += other
+ hour, rem = divmod(delta.seconds, 3600)
+ minute, second = divmod(rem, 60)
+ if 0 < delta.days <= _MAXORDINAL:
+ return type(self).combine(date.fromordinal(delta.days),
+ time(hour, minute, second,
+ delta.microseconds,
+ tzinfo=self._tzinfo))
+ raise OverflowError("result out of range")
+
+ __radd__ = __add__
+
+ def __sub__(self, other):
+ "Subtract two datetimes, or a datetime and a timedelta."
+ if not isinstance(other, datetime):
+ if isinstance(other, timedelta):
+ return self + -other
+ return NotImplemented
+
+ days1 = self.toordinal()
+ days2 = other.toordinal()
+ secs1 = self._second + self._minute * 60 + self._hour * 3600
+ secs2 = other._second + other._minute * 60 + other._hour * 3600
+ base = timedelta(days1 - days2,
+ secs1 - secs2,
+ self._microsecond - other._microsecond)
+ if self._tzinfo is other._tzinfo:
+ return base
+ myoff = self.utcoffset()
+ otoff = other.utcoffset()
+ if myoff == otoff:
+ return base
+ if myoff is None or otoff is None:
+ raise TypeError("cannot mix naive and timezone-aware time")
+ return base + otoff - myoff
+
+ def __hash__(self):
+ if self._hashcode == -1:
+ if self.fold:
+ t = self.replace(fold=0)
+ else:
+ t = self
+ tzoff = t.utcoffset()
+ if tzoff is None:
+ self._hashcode = hash(t._getstate()[0])
+ else:
+ days = _ymd2ord(self.year, self.month, self.day)
+ seconds = self.hour * 3600 + self.minute * 60 + self.second
+ self._hashcode = hash(timedelta(days, seconds, self.microsecond) - tzoff)
+ return self._hashcode
+
+ # Pickle support.
+
+ def _getstate(self, protocol=3):
+ yhi, ylo = divmod(self._year, 256)
+ us2, us3 = divmod(self._microsecond, 256)
+ us1, us2 = divmod(us2, 256)
+ m = self._month
+ if self._fold and protocol > 3:
+ m += 128
+ basestate = bytes([yhi, ylo, m, self._day,
+ self._hour, self._minute, self._second,
+ us1, us2, us3])
+ if self._tzinfo is None:
+ return (basestate,)
+ else:
+ return (basestate, self._tzinfo)
+
+ def __setstate(self, string, tzinfo):
+ if tzinfo is not None and not isinstance(tzinfo, _tzinfo_class):
+ raise TypeError("bad tzinfo state arg")
+ (yhi, ylo, m, self._day, self._hour,
+ self._minute, self._second, us1, us2, us3) = string
+ if m > 127:
+ self._fold = 1
+ self._month = m - 128
+ else:
+ self._fold = 0
+ self._month = m
+ self._year = yhi * 256 + ylo
+ self._microsecond = (((us1 << 8) | us2) << 8) | us3
+ self._tzinfo = tzinfo
+
+ def __reduce_ex__(self, protocol):
+ return (self.__class__, self._getstate(protocol))
+
+ def __reduce__(self):
+ return self.__reduce_ex__(2)
+
+
+datetime.min = datetime(1, 1, 1)
+datetime.max = datetime(9999, 12, 31, 23, 59, 59, 999999)
+datetime.resolution = timedelta(microseconds=1)
+
+
+def _isoweek1monday(year):
+ # Helper to calculate the day number of the Monday starting week 1
+ # XXX This could be done more efficiently
+ THURSDAY = 3
+ firstday = _ymd2ord(year, 1, 1)
+ firstweekday = (firstday + 6) % 7 # See weekday() above
+ week1monday = firstday - firstweekday
+ if firstweekday > THURSDAY:
+ week1monday += 7
+ return week1monday
+
+
+class timezone(tzinfo):
+ __slots__ = '_offset', '_name'
+
+ # Sentinel value to disallow None
+ _Omitted = object()
+ def __new__(cls, offset, name=_Omitted):
+ if not isinstance(offset, timedelta):
+ raise TypeError("offset must be a timedelta")
+ if name is cls._Omitted:
+ if not offset:
+ return cls.utc
+ name = None
+ elif not isinstance(name, str):
+ raise TypeError("name must be a string")
+ if not cls._minoffset <= offset <= cls._maxoffset:
+ raise ValueError("offset must be a timedelta "
+ "strictly between -timedelta(hours=24) and "
+ "timedelta(hours=24).")
+ return cls._create(offset, name)
+
+ @classmethod
+ def _create(cls, offset, name=None):
+ self = tzinfo.__new__(cls)
+ self._offset = offset
+ self._name = name
+ return self
+
+ def __getinitargs__(self):
+ """pickle support"""
+ if self._name is None:
+ return (self._offset,)
+ return (self._offset, self._name)
+
+ def __eq__(self, other):
+ if isinstance(other, timezone):
+ return self._offset == other._offset
+ return NotImplemented
+
+ def __hash__(self):
+ return hash(self._offset)
+
+ def __repr__(self):
+ """Convert to formal string, for repr().
+
+ >>> tz = timezone.utc
+ >>> repr(tz)
+ 'datetime.timezone.utc'
+ >>> tz = timezone(timedelta(hours=-5), 'EST')
+ >>> repr(tz)
+ "datetime.timezone(datetime.timedelta(-1, 68400), 'EST')"
+ """
+ if self is self.utc:
+ return 'datetime.timezone.utc'
+ if self._name is None:
+ return "%s.%s(%r)" % (self.__class__.__module__,
+ self.__class__.__qualname__,
+ self._offset)
+ return "%s.%s(%r, %r)" % (self.__class__.__module__,
+ self.__class__.__qualname__,
+ self._offset, self._name)
+
+ def __str__(self):
+ return self.tzname(None)
+
+ def utcoffset(self, dt):
+ if isinstance(dt, datetime) or dt is None:
+ return self._offset
+ raise TypeError("utcoffset() argument must be a datetime instance"
+ " or None")
+
+ def tzname(self, dt):
+ if isinstance(dt, datetime) or dt is None:
+ if self._name is None:
+ return self._name_from_offset(self._offset)
+ return self._name
+ raise TypeError("tzname() argument must be a datetime instance"
+ " or None")
+
+ def dst(self, dt):
+ if isinstance(dt, datetime) or dt is None:
+ return None
+ raise TypeError("dst() argument must be a datetime instance"
+ " or None")
+
+ def fromutc(self, dt):
+ if isinstance(dt, datetime):
+ if dt.tzinfo is not self:
+ raise ValueError("fromutc: dt.tzinfo "
+ "is not self")
+ return dt + self._offset
+ raise TypeError("fromutc() argument must be a datetime instance"
+ " or None")
+
+ _maxoffset = timedelta(hours=24, microseconds=-1)
+ _minoffset = -_maxoffset
+
+ @staticmethod
+ def _name_from_offset(delta):
+ if not delta:
+ return 'UTC'
+ if delta < timedelta(0):
+ sign = '-'
+ delta = -delta
+ else:
+ sign = '+'
+ hours, rest = divmod(delta, timedelta(hours=1))
+ minutes, rest = divmod(rest, timedelta(minutes=1))
+ seconds = rest.seconds
+ microseconds = rest.microseconds
+ if microseconds:
+ return (f'UTC{sign}{hours:02d}:{minutes:02d}:{seconds:02d}'
+ f'.{microseconds:06d}')
+ if seconds:
+ return f'UTC{sign}{hours:02d}:{minutes:02d}:{seconds:02d}'
+ return f'UTC{sign}{hours:02d}:{minutes:02d}'
+
+timezone.utc = timezone._create(timedelta(0))
+# bpo-37642: These attributes are rounded to the nearest minute for backwards
+# compatibility, even though the constructor will accept a wider range of
+# values. This may change in the future.
+timezone.min = timezone._create(-timedelta(hours=23, minutes=59))
+timezone.max = timezone._create(timedelta(hours=23, minutes=59))
+_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
+
+# Some time zone algebra. For a datetime x, let
+# x.n = x stripped of its timezone -- its naive time.
+# x.o = x.utcoffset(), and assuming that doesn't raise an exception or
+# return None
+# x.d = x.dst(), and assuming that doesn't raise an exception or
+# return None
+# x.s = x's standard offset, x.o - x.d
+#
+# Now some derived rules, where k is a duration (timedelta).
+#
+# 1. x.o = x.s + x.d
+# This follows from the definition of x.s.
+#
+# 2. If x and y have the same tzinfo member, x.s = y.s.
+# This is actually a requirement, an assumption we need to make about
+# sane tzinfo classes.
+#
+# 3. The naive UTC time corresponding to x is x.n - x.o.
+# This is again a requirement for a sane tzinfo class.
+#
+# 4. (x+k).s = x.s
+# This follows from #2, and that datimetimetz+timedelta preserves tzinfo.
+#
+# 5. (x+k).n = x.n + k
+# Again follows from how arithmetic is defined.
+#
+# Now we can explain tz.fromutc(x). Let's assume it's an interesting case
+# (meaning that the various tzinfo methods exist, and don't blow up or return
+# None when called).
+#
+# The function wants to return a datetime y with timezone tz, equivalent to x.
+# x is already in UTC.
+#
+# By #3, we want
+#
+# y.n - y.o = x.n [1]
+#
+# The algorithm starts by attaching tz to x.n, and calling that y. So
+# x.n = y.n at the start. Then it wants to add a duration k to y, so that [1]
+# becomes true; in effect, we want to solve [2] for k:
+#
+# (y+k).n - (y+k).o = x.n [2]
+#
+# By #1, this is the same as
+#
+# (y+k).n - ((y+k).s + (y+k).d) = x.n [3]
+#
+# By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start.
+# Substituting that into [3],
+#
+# x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving
+# k - (y+k).s - (y+k).d = 0; rearranging,
+# k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so
+# k = y.s - (y+k).d
+#
+# On the RHS, (y+k).d can't be computed directly, but y.s can be, and we
+# approximate k by ignoring the (y+k).d term at first. Note that k can't be
+# very large, since all offset-returning methods return a duration of magnitude
+# less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must
+# be 0, so ignoring it has no consequence then.
+#
+# In any case, the new value is
+#
+# z = y + y.s [4]
+#
+# It's helpful to step back at look at [4] from a higher level: it's simply
+# mapping from UTC to tz's standard time.
+#
+# At this point, if
+#
+# z.n - z.o = x.n [5]
+#
+# we have an equivalent time, and are almost done. The insecurity here is
+# at the start of daylight time. Picture US Eastern for concreteness. The wall
+# time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good
+# sense then. The docs ask that an Eastern tzinfo class consider such a time to
+# be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST
+# on the day DST starts. We want to return the 1:MM EST spelling because that's
+# the only spelling that makes sense on the local wall clock.
+#
+# In fact, if [5] holds at this point, we do have the standard-time spelling,
+# but that takes a bit of proof. We first prove a stronger result. What's the
+# difference between the LHS and RHS of [5]? Let
+#
+# diff = x.n - (z.n - z.o) [6]
+#
+# Now
+# z.n = by [4]
+# (y + y.s).n = by #5
+# y.n + y.s = since y.n = x.n
+# x.n + y.s = since z and y are have the same tzinfo member,
+# y.s = z.s by #2
+# x.n + z.s
+#
+# Plugging that back into [6] gives
+#
+# diff =
+# x.n - ((x.n + z.s) - z.o) = expanding
+# x.n - x.n - z.s + z.o = cancelling
+# - z.s + z.o = by #2
+# z.d
+#
+# So diff = z.d.
+#
+# If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time
+# spelling we wanted in the endcase described above. We're done. Contrarily,
+# if z.d = 0, then we have a UTC equivalent, and are also done.
+#
+# If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to
+# add to z (in effect, z is in tz's standard time, and we need to shift the
+# local clock into tz's daylight time).
+#
+# Let
+#
+# z' = z + z.d = z + diff [7]
+#
+# and we can again ask whether
+#
+# z'.n - z'.o = x.n [8]
+#
+# If so, we're done. If not, the tzinfo class is insane, according to the
+# assumptions we've made. This also requires a bit of proof. As before, let's
+# compute the difference between the LHS and RHS of [8] (and skipping some of
+# the justifications for the kinds of substitutions we've done several times
+# already):
+#
+# diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7]
+# x.n - (z.n + diff - z'.o) = replacing diff via [6]
+# x.n - (z.n + x.n - (z.n - z.o) - z'.o) =
+# x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n
+# - z.n + z.n - z.o + z'.o = cancel z.n
+# - z.o + z'.o = #1 twice
+# -z.s - z.d + z'.s + z'.d = z and z' have same tzinfo
+# z'.d - z.d
+#
+# So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal,
+# we've found the UTC-equivalent so are done. In fact, we stop with [7] and
+# return z', not bothering to compute z'.d.
+#
+# How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by
+# a dst() offset, and starting *from* a time already in DST (we know z.d != 0),
+# would have to change the result dst() returns: we start in DST, and moving
+# a little further into it takes us out of DST.
+#
+# There isn't a sane case where this can happen. The closest it gets is at
+# the end of DST, where there's an hour in UTC with no spelling in a hybrid
+# tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During
+# that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM
+# UTC) because the docs insist on that, but 0:MM is taken as being in daylight
+# time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local
+# clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in
+# standard time. Since that's what the local clock *does*, we want to map both
+# UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous
+# in local time, but so it goes -- it's the way the local clock works.
+#
+# When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0,
+# so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going.
+# z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8]
+# (correctly) concludes that z' is not UTC-equivalent to x.
+#
+# Because we know z.d said z was in daylight time (else [5] would have held and
+# we would have stopped then), and we know z.d != z'.d (else [8] would have held
+# and we have stopped then), and there are only 2 possible values dst() can
+# return in Eastern, it follows that z'.d must be 0 (which it is in the example,
+# but the reasoning doesn't depend on the example -- it depends on there being
+# two possible dst() outcomes, one zero and the other non-zero). Therefore
+# z' must be in standard time, and is the spelling we want in this case.
+#
+# Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is
+# concerned (because it takes z' as being in standard time rather than the
+# daylight time we intend here), but returning it gives the real-life "local
+# clock repeats an hour" behavior when mapping the "unspellable" UTC hour into
+# tz.
+#
+# When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with
+# the 1:MM standard time spelling we want.
+#
+# So how can this break? One of the assumptions must be violated. Two
+# possibilities:
+#
+# 1) [2] effectively says that y.s is invariant across all y belong to a given
+# time zone. This isn't true if, for political reasons or continental drift,
+# a region decides to change its base offset from UTC.
+#
+# 2) There may be versions of "double daylight" time where the tail end of
+# the analysis gives up a step too early. I haven't thought about that
+# enough to say.
+#
+# In any case, it's clear that the default fromutc() is strong enough to handle
+# "almost all" time zones: so long as the standard offset is invariant, it
+# doesn't matter if daylight time transition points change from year to year, or
+# if daylight time is skipped in some years; it doesn't matter how large or
+# small dst() may get within its bounds; and it doesn't even matter if some
+# perverse time zone returns a negative dst()). So a breaking case must be
+# pretty bizarre, and a tzinfo subclass can override fromutc() if it is.
+
+try:
+ from _datetime import *
+except ImportError:
+ pass
+else:
+ # Clean up unused names
+ del (_DAYNAMES, _DAYS_BEFORE_MONTH, _DAYS_IN_MONTH, _DI100Y, _DI400Y,
+ _DI4Y, _EPOCH, _MAXORDINAL, _MONTHNAMES, _build_struct_time,
+ _check_date_fields, _check_int_field, _check_time_fields,
+ _check_tzinfo_arg, _check_tzname, _check_utc_offset, _cmp, _cmperror,
+ _date_class, _days_before_month, _days_before_year, _days_in_month,
+ _format_time, _format_offset, _is_leap, _isoweek1monday, _math,
+ _ord2ymd, _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord,
+ _divide_and_round, _parse_isoformat_date, _parse_isoformat_time,
+ _parse_hh_mm_ss_ff)
+ # XXX Since import * above excludes names that start with _,
+ # docstring does not get overwritten. In the future, it may be
+ # appropriate to maintain a single module level docstring and
+ # remove the following line.
+ from _datetime import __doc__
diff --git a/dist/lib/decimal.py b/dist/lib/decimal.py
new file mode 100644
index 0000000..7746ea2
--- /dev/null
+++ b/dist/lib/decimal.py
@@ -0,0 +1,11 @@
+
+try:
+ from _decimal import *
+ from _decimal import __doc__
+ from _decimal import __version__
+ from _decimal import __libmpdec_version__
+except ImportError:
+ from _pydecimal import *
+ from _pydecimal import __doc__
+ from _pydecimal import __version__
+ from _pydecimal import __libmpdec_version__
diff --git a/dist/lib/difflib.py b/dist/lib/difflib.py
new file mode 100644
index 0000000..5d75643
--- /dev/null
+++ b/dist/lib/difflib.py
@@ -0,0 +1,2086 @@
+"""
+Module difflib -- helpers for computing deltas between objects.
+
+Function get_close_matches(word, possibilities, n=3, cutoff=0.6):
+ Use SequenceMatcher to return list of the best "good enough" matches.
+
+Function context_diff(a, b):
+ For two lists of strings, return a delta in context diff format.
+
+Function ndiff(a, b):
+ Return a delta: the difference between `a` and `b` (lists of strings).
+
+Function restore(delta, which):
+ Return one of the two sequences that generated an ndiff delta.
+
+Function unified_diff(a, b):
+ For two lists of strings, return a delta in unified diff format.
+
+Class SequenceMatcher:
+ A flexible class for comparing pairs of sequences of any type.
+
+Class Differ:
+ For producing human-readable deltas from sequences of lines of text.
+
+Class HtmlDiff:
+ For producing HTML side by side comparison with change highlights.
+"""
+
+__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
+ 'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
+ 'unified_diff', 'diff_bytes', 'HtmlDiff', 'Match']
+
+from heapq import nlargest as _nlargest
+from collections import namedtuple as _namedtuple
+
+Match = _namedtuple('Match', 'a b size')
+
+def _calculate_ratio(matches, length):
+ if length:
+ return 2.0 * matches / length
+ return 1.0
+
+class SequenceMatcher:
+
+ """
+ SequenceMatcher is a flexible class for comparing pairs of sequences of
+ any type, so long as the sequence elements are hashable. The basic
+ algorithm predates, and is a little fancier than, an algorithm
+ published in the late 1980's by Ratcliff and Obershelp under the
+ hyperbolic name "gestalt pattern matching". The basic idea is to find
+ the longest contiguous matching subsequence that contains no "junk"
+ elements (R-O doesn't address junk). The same idea is then applied
+ recursively to the pieces of the sequences to the left and to the right
+ of the matching subsequence. This does not yield minimal edit
+ sequences, but does tend to yield matches that "look right" to people.
+
+ SequenceMatcher tries to compute a "human-friendly diff" between two
+ sequences. Unlike e.g. UNIX(tm) diff, the fundamental notion is the
+ longest *contiguous* & junk-free matching subsequence. That's what
+ catches peoples' eyes. The Windows(tm) windiff has another interesting
+ notion, pairing up elements that appear uniquely in each sequence.
+ That, and the method here, appear to yield more intuitive difference
+ reports than does diff. This method appears to be the least vulnerable
+ to synching up on blocks of "junk lines", though (like blank lines in
+ ordinary text files, or maybe "" lines in HTML files). That may be
+ because this is the only method of the 3 that has a *concept* of
+ "junk" .
+
+ Example, comparing two strings, and considering blanks to be "junk":
+
+ >>> s = SequenceMatcher(lambda x: x == " ",
+ ... "private Thread currentThread;",
+ ... "private volatile Thread currentThread;")
+ >>>
+
+ .ratio() returns a float in [0, 1], measuring the "similarity" of the
+ sequences. As a rule of thumb, a .ratio() value over 0.6 means the
+ sequences are close matches:
+
+ >>> print(round(s.ratio(), 3))
+ 0.866
+ >>>
+
+ If you're only interested in where the sequences match,
+ .get_matching_blocks() is handy:
+
+ >>> for block in s.get_matching_blocks():
+ ... print("a[%d] and b[%d] match for %d elements" % block)
+ a[0] and b[0] match for 8 elements
+ a[8] and b[17] match for 21 elements
+ a[29] and b[38] match for 0 elements
+
+ Note that the last tuple returned by .get_matching_blocks() is always a
+ dummy, (len(a), len(b), 0), and this is the only case in which the last
+ tuple element (number of elements matched) is 0.
+
+ If you want to know how to change the first sequence into the second,
+ use .get_opcodes():
+
+ >>> for opcode in s.get_opcodes():
+ ... print("%6s a[%d:%d] b[%d:%d]" % opcode)
+ equal a[0:8] b[0:8]
+ insert a[8:8] b[8:17]
+ equal a[8:29] b[17:38]
+
+ See the Differ class for a fancy human-friendly file differencer, which
+ uses SequenceMatcher both to compare sequences of lines, and to compare
+ sequences of characters within similar (near-matching) lines.
+
+ See also function get_close_matches() in this module, which shows how
+ simple code building on SequenceMatcher can be used to do useful work.
+
+ Timing: Basic R-O is cubic time worst case and quadratic time expected
+ case. SequenceMatcher is quadratic time for the worst case and has
+ expected-case behavior dependent in a complicated way on how many
+ elements the sequences have in common; best case time is linear.
+
+ Methods:
+
+ __init__(isjunk=None, a='', b='')
+ Construct a SequenceMatcher.
+
+ set_seqs(a, b)
+ Set the two sequences to be compared.
+
+ set_seq1(a)
+ Set the first sequence to be compared.
+
+ set_seq2(b)
+ Set the second sequence to be compared.
+
+ find_longest_match(alo, ahi, blo, bhi)
+ Find longest matching block in a[alo:ahi] and b[blo:bhi].
+
+ get_matching_blocks()
+ Return list of triples describing matching subsequences.
+
+ get_opcodes()
+ Return list of 5-tuples describing how to turn a into b.
+
+ ratio()
+ Return a measure of the sequences' similarity (float in [0,1]).
+
+ quick_ratio()
+ Return an upper bound on .ratio() relatively quickly.
+
+ real_quick_ratio()
+ Return an upper bound on ratio() very quickly.
+ """
+
+ def __init__(self, isjunk=None, a='', b='', autojunk=True):
+ """Construct a SequenceMatcher.
+
+ Optional arg isjunk is None (the default), or a one-argument
+ function that takes a sequence element and returns true iff the
+ element is junk. None is equivalent to passing "lambda x: 0", i.e.
+ no elements are considered to be junk. For example, pass
+ lambda x: x in " \\t"
+ if you're comparing lines as sequences of characters, and don't
+ want to synch up on blanks or hard tabs.
+
+ Optional arg a is the first of two sequences to be compared. By
+ default, an empty string. The elements of a must be hashable. See
+ also .set_seqs() and .set_seq1().
+
+ Optional arg b is the second of two sequences to be compared. By
+ default, an empty string. The elements of b must be hashable. See
+ also .set_seqs() and .set_seq2().
+
+ Optional arg autojunk should be set to False to disable the
+ "automatic junk heuristic" that treats popular elements as junk
+ (see module documentation for more information).
+ """
+
+ # Members:
+ # a
+ # first sequence
+ # b
+ # second sequence; differences are computed as "what do
+ # we need to do to 'a' to change it into 'b'?"
+ # b2j
+ # for x in b, b2j[x] is a list of the indices (into b)
+ # at which x appears; junk and popular elements do not appear
+ # fullbcount
+ # for x in b, fullbcount[x] == the number of times x
+ # appears in b; only materialized if really needed (used
+ # only for computing quick_ratio())
+ # matching_blocks
+ # a list of (i, j, k) triples, where a[i:i+k] == b[j:j+k];
+ # ascending & non-overlapping in i and in j; terminated by
+ # a dummy (len(a), len(b), 0) sentinel
+ # opcodes
+ # a list of (tag, i1, i2, j1, j2) tuples, where tag is
+ # one of
+ # 'replace' a[i1:i2] should be replaced by b[j1:j2]
+ # 'delete' a[i1:i2] should be deleted
+ # 'insert' b[j1:j2] should be inserted
+ # 'equal' a[i1:i2] == b[j1:j2]
+ # isjunk
+ # a user-supplied function taking a sequence element and
+ # returning true iff the element is "junk" -- this has
+ # subtle but helpful effects on the algorithm, which I'll
+ # get around to writing up someday <0.9 wink>.
+ # DON'T USE! Only __chain_b uses this. Use "in self.bjunk".
+ # bjunk
+ # the items in b for which isjunk is True.
+ # bpopular
+ # nonjunk items in b treated as junk by the heuristic (if used).
+
+ self.isjunk = isjunk
+ self.a = self.b = None
+ self.autojunk = autojunk
+ self.set_seqs(a, b)
+
+ def set_seqs(self, a, b):
+ """Set the two sequences to be compared.
+
+ >>> s = SequenceMatcher()
+ >>> s.set_seqs("abcd", "bcde")
+ >>> s.ratio()
+ 0.75
+ """
+
+ self.set_seq1(a)
+ self.set_seq2(b)
+
+ def set_seq1(self, a):
+ """Set the first sequence to be compared.
+
+ The second sequence to be compared is not changed.
+
+ >>> s = SequenceMatcher(None, "abcd", "bcde")
+ >>> s.ratio()
+ 0.75
+ >>> s.set_seq1("bcde")
+ >>> s.ratio()
+ 1.0
+ >>>
+
+ SequenceMatcher computes and caches detailed information about the
+ second sequence, so if you want to compare one sequence S against
+ many sequences, use .set_seq2(S) once and call .set_seq1(x)
+ repeatedly for each of the other sequences.
+
+ See also set_seqs() and set_seq2().
+ """
+
+ if a is self.a:
+ return
+ self.a = a
+ self.matching_blocks = self.opcodes = None
+
+ def set_seq2(self, b):
+ """Set the second sequence to be compared.
+
+ The first sequence to be compared is not changed.
+
+ >>> s = SequenceMatcher(None, "abcd", "bcde")
+ >>> s.ratio()
+ 0.75
+ >>> s.set_seq2("abcd")
+ >>> s.ratio()
+ 1.0
+ >>>
+
+ SequenceMatcher computes and caches detailed information about the
+ second sequence, so if you want to compare one sequence S against
+ many sequences, use .set_seq2(S) once and call .set_seq1(x)
+ repeatedly for each of the other sequences.
+
+ See also set_seqs() and set_seq1().
+ """
+
+ if b is self.b:
+ return
+ self.b = b
+ self.matching_blocks = self.opcodes = None
+ self.fullbcount = None
+ self.__chain_b()
+
+ # For each element x in b, set b2j[x] to a list of the indices in
+ # b where x appears; the indices are in increasing order; note that
+ # the number of times x appears in b is len(b2j[x]) ...
+ # when self.isjunk is defined, junk elements don't show up in this
+ # map at all, which stops the central find_longest_match method
+ # from starting any matching block at a junk element ...
+ # b2j also does not contain entries for "popular" elements, meaning
+ # elements that account for more than 1 + 1% of the total elements, and
+ # when the sequence is reasonably large (>= 200 elements); this can
+ # be viewed as an adaptive notion of semi-junk, and yields an enormous
+ # speedup when, e.g., comparing program files with hundreds of
+ # instances of "return NULL;" ...
+ # note that this is only called when b changes; so for cross-product
+ # kinds of matches, it's best to call set_seq2 once, then set_seq1
+ # repeatedly
+
+ def __chain_b(self):
+ # Because isjunk is a user-defined (not C) function, and we test
+ # for junk a LOT, it's important to minimize the number of calls.
+ # Before the tricks described here, __chain_b was by far the most
+ # time-consuming routine in the whole module! If anyone sees
+ # Jim Roskind, thank him again for profile.py -- I never would
+ # have guessed that.
+ # The first trick is to build b2j ignoring the possibility
+ # of junk. I.e., we don't call isjunk at all yet. Throwing
+ # out the junk later is much cheaper than building b2j "right"
+ # from the start.
+ b = self.b
+ self.b2j = b2j = {}
+
+ for i, elt in enumerate(b):
+ indices = b2j.setdefault(elt, [])
+ indices.append(i)
+
+ # Purge junk elements
+ self.bjunk = junk = set()
+ isjunk = self.isjunk
+ if isjunk:
+ for elt in b2j.keys():
+ if isjunk(elt):
+ junk.add(elt)
+ for elt in junk: # separate loop avoids separate list of keys
+ del b2j[elt]
+
+ # Purge popular elements that are not junk
+ self.bpopular = popular = set()
+ n = len(b)
+ if self.autojunk and n >= 200:
+ ntest = n // 100 + 1
+ for elt, idxs in b2j.items():
+ if len(idxs) > ntest:
+ popular.add(elt)
+ for elt in popular: # ditto; as fast for 1% deletion
+ del b2j[elt]
+
+ def find_longest_match(self, alo, ahi, blo, bhi):
+ """Find longest matching block in a[alo:ahi] and b[blo:bhi].
+
+ If isjunk is not defined:
+
+ Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
+ alo <= i <= i+k <= ahi
+ blo <= j <= j+k <= bhi
+ and for all (i',j',k') meeting those conditions,
+ k >= k'
+ i <= i'
+ and if i == i', j <= j'
+
+ In other words, of all maximal matching blocks, return one that
+ starts earliest in a, and of all those maximal matching blocks that
+ start earliest in a, return the one that starts earliest in b.
+
+ >>> s = SequenceMatcher(None, " abcd", "abcd abcd")
+ >>> s.find_longest_match(0, 5, 0, 9)
+ Match(a=0, b=4, size=5)
+
+ If isjunk is defined, first the longest matching block is
+ determined as above, but with the additional restriction that no
+ junk element appears in the block. Then that block is extended as
+ far as possible by matching (only) junk elements on both sides. So
+ the resulting block never matches on junk except as identical junk
+ happens to be adjacent to an "interesting" match.
+
+ Here's the same example as before, but considering blanks to be
+ junk. That prevents " abcd" from matching the " abcd" at the tail
+ end of the second sequence directly. Instead only the "abcd" can
+ match, and matches the leftmost "abcd" in the second sequence:
+
+ >>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")
+ >>> s.find_longest_match(0, 5, 0, 9)
+ Match(a=1, b=0, size=4)
+
+ If no blocks match, return (alo, blo, 0).
+
+ >>> s = SequenceMatcher(None, "ab", "c")
+ >>> s.find_longest_match(0, 2, 0, 1)
+ Match(a=0, b=0, size=0)
+ """
+
+ # CAUTION: stripping common prefix or suffix would be incorrect.
+ # E.g.,
+ # ab
+ # acab
+ # Longest matching block is "ab", but if common prefix is
+ # stripped, it's "a" (tied with "b"). UNIX(tm) diff does so
+ # strip, so ends up claiming that ab is changed to acab by
+ # inserting "ca" in the middle. That's minimal but unintuitive:
+ # "it's obvious" that someone inserted "ac" at the front.
+ # Windiff ends up at the same place as diff, but by pairing up
+ # the unique 'b's and then matching the first two 'a's.
+
+ a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.bjunk.__contains__
+ besti, bestj, bestsize = alo, blo, 0
+ # find longest junk-free match
+ # during an iteration of the loop, j2len[j] = length of longest
+ # junk-free match ending with a[i-1] and b[j]
+ j2len = {}
+ nothing = []
+ for i in range(alo, ahi):
+ # look at all instances of a[i] in b; note that because
+ # b2j has no junk keys, the loop is skipped if a[i] is junk
+ j2lenget = j2len.get
+ newj2len = {}
+ for j in b2j.get(a[i], nothing):
+ # a[i] matches b[j]
+ if j < blo:
+ continue
+ if j >= bhi:
+ break
+ k = newj2len[j] = j2lenget(j-1, 0) + 1
+ if k > bestsize:
+ besti, bestj, bestsize = i-k+1, j-k+1, k
+ j2len = newj2len
+
+ # Extend the best by non-junk elements on each end. In particular,
+ # "popular" non-junk elements aren't in b2j, which greatly speeds
+ # the inner loop above, but also means "the best" match so far
+ # doesn't contain any junk *or* popular non-junk elements.
+ while besti > alo and bestj > blo and \
+ not isbjunk(b[bestj-1]) and \
+ a[besti-1] == b[bestj-1]:
+ besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
+ while besti+bestsize < ahi and bestj+bestsize < bhi and \
+ not isbjunk(b[bestj+bestsize]) and \
+ a[besti+bestsize] == b[bestj+bestsize]:
+ bestsize += 1
+
+ # Now that we have a wholly interesting match (albeit possibly
+ # empty!), we may as well suck up the matching junk on each
+ # side of it too. Can't think of a good reason not to, and it
+ # saves post-processing the (possibly considerable) expense of
+ # figuring out what to do with it. In the case of an empty
+ # interesting match, this is clearly the right thing to do,
+ # because no other kind of match is possible in the regions.
+ while besti > alo and bestj > blo and \
+ isbjunk(b[bestj-1]) and \
+ a[besti-1] == b[bestj-1]:
+ besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
+ while besti+bestsize < ahi and bestj+bestsize < bhi and \
+ isbjunk(b[bestj+bestsize]) and \
+ a[besti+bestsize] == b[bestj+bestsize]:
+ bestsize = bestsize + 1
+
+ return Match(besti, bestj, bestsize)
+
+ def get_matching_blocks(self):
+ """Return list of triples describing matching subsequences.
+
+ Each triple is of the form (i, j, n), and means that
+ a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in
+ i and in j. New in Python 2.5, it's also guaranteed that if
+ (i, j, n) and (i', j', n') are adjacent triples in the list, and
+ the second is not the last triple in the list, then i+n != i' or
+ j+n != j'. IOW, adjacent triples never describe adjacent equal
+ blocks.
+
+ The last triple is a dummy, (len(a), len(b), 0), and is the only
+ triple with n==0.
+
+ >>> s = SequenceMatcher(None, "abxcd", "abcd")
+ >>> list(s.get_matching_blocks())
+ [Match(a=0, b=0, size=2), Match(a=3, b=2, size=2), Match(a=5, b=4, size=0)]
+ """
+
+ if self.matching_blocks is not None:
+ return self.matching_blocks
+ la, lb = len(self.a), len(self.b)
+
+ # This is most naturally expressed as a recursive algorithm, but
+ # at least one user bumped into extreme use cases that exceeded
+ # the recursion limit on their box. So, now we maintain a list
+ # ('queue`) of blocks we still need to look at, and append partial
+ # results to `matching_blocks` in a loop; the matches are sorted
+ # at the end.
+ queue = [(0, la, 0, lb)]
+ matching_blocks = []
+ while queue:
+ alo, ahi, blo, bhi = queue.pop()
+ i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi)
+ # a[alo:i] vs b[blo:j] unknown
+ # a[i:i+k] same as b[j:j+k]
+ # a[i+k:ahi] vs b[j+k:bhi] unknown
+ if k: # if k is 0, there was no matching block
+ matching_blocks.append(x)
+ if alo < i and blo < j:
+ queue.append((alo, i, blo, j))
+ if i+k < ahi and j+k < bhi:
+ queue.append((i+k, ahi, j+k, bhi))
+ matching_blocks.sort()
+
+ # It's possible that we have adjacent equal blocks in the
+ # matching_blocks list now. Starting with 2.5, this code was added
+ # to collapse them.
+ i1 = j1 = k1 = 0
+ non_adjacent = []
+ for i2, j2, k2 in matching_blocks:
+ # Is this block adjacent to i1, j1, k1?
+ if i1 + k1 == i2 and j1 + k1 == j2:
+ # Yes, so collapse them -- this just increases the length of
+ # the first block by the length of the second, and the first
+ # block so lengthened remains the block to compare against.
+ k1 += k2
+ else:
+ # Not adjacent. Remember the first block (k1==0 means it's
+ # the dummy we started with), and make the second block the
+ # new block to compare against.
+ if k1:
+ non_adjacent.append((i1, j1, k1))
+ i1, j1, k1 = i2, j2, k2
+ if k1:
+ non_adjacent.append((i1, j1, k1))
+
+ non_adjacent.append( (la, lb, 0) )
+ self.matching_blocks = list(map(Match._make, non_adjacent))
+ return self.matching_blocks
+
+ def get_opcodes(self):
+ """Return list of 5-tuples describing how to turn a into b.
+
+ Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple
+ has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the
+ tuple preceding it, and likewise for j1 == the previous j2.
+
+ The tags are strings, with these meanings:
+
+ 'replace': a[i1:i2] should be replaced by b[j1:j2]
+ 'delete': a[i1:i2] should be deleted.
+ Note that j1==j2 in this case.
+ 'insert': b[j1:j2] should be inserted at a[i1:i1].
+ Note that i1==i2 in this case.
+ 'equal': a[i1:i2] == b[j1:j2]
+
+ >>> a = "qabxcd"
+ >>> b = "abycdf"
+ >>> s = SequenceMatcher(None, a, b)
+ >>> for tag, i1, i2, j1, j2 in s.get_opcodes():
+ ... print(("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
+ ... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])))
+ delete a[0:1] (q) b[0:0] ()
+ equal a[1:3] (ab) b[0:2] (ab)
+ replace a[3:4] (x) b[2:3] (y)
+ equal a[4:6] (cd) b[3:5] (cd)
+ insert a[6:6] () b[5:6] (f)
+ """
+
+ if self.opcodes is not None:
+ return self.opcodes
+ i = j = 0
+ self.opcodes = answer = []
+ for ai, bj, size in self.get_matching_blocks():
+ # invariant: we've pumped out correct diffs to change
+ # a[:i] into b[:j], and the next matching block is
+ # a[ai:ai+size] == b[bj:bj+size]. So we need to pump
+ # out a diff to change a[i:ai] into b[j:bj], pump out
+ # the matching block, and move (i,j) beyond the match
+ tag = ''
+ if i < ai and j < bj:
+ tag = 'replace'
+ elif i < ai:
+ tag = 'delete'
+ elif j < bj:
+ tag = 'insert'
+ if tag:
+ answer.append( (tag, i, ai, j, bj) )
+ i, j = ai+size, bj+size
+ # the list of matching blocks is terminated by a
+ # sentinel with size 0
+ if size:
+ answer.append( ('equal', ai, i, bj, j) )
+ return answer
+
+ def get_grouped_opcodes(self, n=3):
+ """ Isolate change clusters by eliminating ranges with no changes.
+
+ Return a generator of groups with up to n lines of context.
+ Each group is in the same format as returned by get_opcodes().
+
+ >>> from pprint import pprint
+ >>> a = list(map(str, range(1,40)))
+ >>> b = a[:]
+ >>> b[8:8] = ['i'] # Make an insertion
+ >>> b[20] += 'x' # Make a replacement
+ >>> b[23:28] = [] # Make a deletion
+ >>> b[30] += 'y' # Make another replacement
+ >>> pprint(list(SequenceMatcher(None,a,b).get_grouped_opcodes()))
+ [[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],
+ [('equal', 16, 19, 17, 20),
+ ('replace', 19, 20, 20, 21),
+ ('equal', 20, 22, 21, 23),
+ ('delete', 22, 27, 23, 23),
+ ('equal', 27, 30, 23, 26)],
+ [('equal', 31, 34, 27, 30),
+ ('replace', 34, 35, 30, 31),
+ ('equal', 35, 38, 31, 34)]]
+ """
+
+ codes = self.get_opcodes()
+ if not codes:
+ codes = [("equal", 0, 1, 0, 1)]
+ # Fixup leading and trailing groups if they show no changes.
+ if codes[0][0] == 'equal':
+ tag, i1, i2, j1, j2 = codes[0]
+ codes[0] = tag, max(i1, i2-n), i2, max(j1, j2-n), j2
+ if codes[-1][0] == 'equal':
+ tag, i1, i2, j1, j2 = codes[-1]
+ codes[-1] = tag, i1, min(i2, i1+n), j1, min(j2, j1+n)
+
+ nn = n + n
+ group = []
+ for tag, i1, i2, j1, j2 in codes:
+ # End the current group and start a new one whenever
+ # there is a large range with no changes.
+ if tag == 'equal' and i2-i1 > nn:
+ group.append((tag, i1, min(i2, i1+n), j1, min(j2, j1+n)))
+ yield group
+ group = []
+ i1, j1 = max(i1, i2-n), max(j1, j2-n)
+ group.append((tag, i1, i2, j1 ,j2))
+ if group and not (len(group)==1 and group[0][0] == 'equal'):
+ yield group
+
+ def ratio(self):
+ """Return a measure of the sequences' similarity (float in [0,1]).
+
+ Where T is the total number of elements in both sequences, and
+ M is the number of matches, this is 2.0*M / T.
+ Note that this is 1 if the sequences are identical, and 0 if
+ they have nothing in common.
+
+ .ratio() is expensive to compute if you haven't already computed
+ .get_matching_blocks() or .get_opcodes(), in which case you may
+ want to try .quick_ratio() or .real_quick_ratio() first to get an
+ upper bound.
+
+ >>> s = SequenceMatcher(None, "abcd", "bcde")
+ >>> s.ratio()
+ 0.75
+ >>> s.quick_ratio()
+ 0.75
+ >>> s.real_quick_ratio()
+ 1.0
+ """
+
+ matches = sum(triple[-1] for triple in self.get_matching_blocks())
+ return _calculate_ratio(matches, len(self.a) + len(self.b))
+
+ def quick_ratio(self):
+ """Return an upper bound on ratio() relatively quickly.
+
+ This isn't defined beyond that it is an upper bound on .ratio(), and
+ is faster to compute.
+ """
+
+ # viewing a and b as multisets, set matches to the cardinality
+ # of their intersection; this counts the number of matches
+ # without regard to order, so is clearly an upper bound
+ if self.fullbcount is None:
+ self.fullbcount = fullbcount = {}
+ for elt in self.b:
+ fullbcount[elt] = fullbcount.get(elt, 0) + 1
+ fullbcount = self.fullbcount
+ # avail[x] is the number of times x appears in 'b' less the
+ # number of times we've seen it in 'a' so far ... kinda
+ avail = {}
+ availhas, matches = avail.__contains__, 0
+ for elt in self.a:
+ if availhas(elt):
+ numb = avail[elt]
+ else:
+ numb = fullbcount.get(elt, 0)
+ avail[elt] = numb - 1
+ if numb > 0:
+ matches = matches + 1
+ return _calculate_ratio(matches, len(self.a) + len(self.b))
+
+ def real_quick_ratio(self):
+ """Return an upper bound on ratio() very quickly.
+
+ This isn't defined beyond that it is an upper bound on .ratio(), and
+ is faster to compute than either .ratio() or .quick_ratio().
+ """
+
+ la, lb = len(self.a), len(self.b)
+ # can't have more matches than the number of elements in the
+ # shorter sequence
+ return _calculate_ratio(min(la, lb), la + lb)
+
+def get_close_matches(word, possibilities, n=3, cutoff=0.6):
+ """Use SequenceMatcher to return list of the best "good enough" matches.
+
+ word is a sequence for which close matches are desired (typically a
+ string).
+
+ possibilities is a list of sequences against which to match word
+ (typically a list of strings).
+
+ Optional arg n (default 3) is the maximum number of close matches to
+ return. n must be > 0.
+
+ Optional arg cutoff (default 0.6) is a float in [0, 1]. Possibilities
+ that don't score at least that similar to word are ignored.
+
+ The best (no more than n) matches among the possibilities are returned
+ in a list, sorted by similarity score, most similar first.
+
+ >>> get_close_matches("appel", ["ape", "apple", "peach", "puppy"])
+ ['apple', 'ape']
+ >>> import keyword as _keyword
+ >>> get_close_matches("wheel", _keyword.kwlist)
+ ['while']
+ >>> get_close_matches("Apple", _keyword.kwlist)
+ []
+ >>> get_close_matches("accept", _keyword.kwlist)
+ ['except']
+ """
+
+ if not n > 0:
+ raise ValueError("n must be > 0: %r" % (n,))
+ if not 0.0 <= cutoff <= 1.0:
+ raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,))
+ result = []
+ s = SequenceMatcher()
+ s.set_seq2(word)
+ for x in possibilities:
+ s.set_seq1(x)
+ if s.real_quick_ratio() >= cutoff and \
+ s.quick_ratio() >= cutoff and \
+ s.ratio() >= cutoff:
+ result.append((s.ratio(), x))
+
+ # Move the best scorers to head of list
+ result = _nlargest(n, result)
+ # Strip scores for the best n matches
+ return [x for score, x in result]
+
+
+def _keep_original_ws(s, tag_s):
+ """Replace whitespace with the original whitespace characters in `s`"""
+ return ''.join(
+ c if tag_c == " " and c.isspace() else tag_c
+ for c, tag_c in zip(s, tag_s)
+ )
+
+
+
+class Differ:
+ r"""
+ Differ is a class for comparing sequences of lines of text, and
+ producing human-readable differences or deltas. Differ uses
+ SequenceMatcher both to compare sequences of lines, and to compare
+ sequences of characters within similar (near-matching) lines.
+
+ Each line of a Differ delta begins with a two-letter code:
+
+ '- ' line unique to sequence 1
+ '+ ' line unique to sequence 2
+ ' ' line common to both sequences
+ '? ' line not present in either input sequence
+
+ Lines beginning with '? ' attempt to guide the eye to intraline
+ differences, and were not present in either input sequence. These lines
+ can be confusing if the sequences contain tab characters.
+
+ Note that Differ makes no claim to produce a *minimal* diff. To the
+ contrary, minimal diffs are often counter-intuitive, because they synch
+ up anywhere possible, sometimes accidental matches 100 pages apart.
+ Restricting synch points to contiguous matches preserves some notion of
+ locality, at the occasional cost of producing a longer diff.
+
+ Example: Comparing two texts.
+
+ First we set up the texts, sequences of individual single-line strings
+ ending with newlines (such sequences can also be obtained from the
+ `readlines()` method of file-like objects):
+
+ >>> text1 = ''' 1. Beautiful is better than ugly.
+ ... 2. Explicit is better than implicit.
+ ... 3. Simple is better than complex.
+ ... 4. Complex is better than complicated.
+ ... '''.splitlines(keepends=True)
+ >>> len(text1)
+ 4
+ >>> text1[0][-1]
+ '\n'
+ >>> text2 = ''' 1. Beautiful is better than ugly.
+ ... 3. Simple is better than complex.
+ ... 4. Complicated is better than complex.
+ ... 5. Flat is better than nested.
+ ... '''.splitlines(keepends=True)
+
+ Next we instantiate a Differ object:
+
+ >>> d = Differ()
+
+ Note that when instantiating a Differ object we may pass functions to
+ filter out line and character 'junk'. See Differ.__init__ for details.
+
+ Finally, we compare the two:
+
+ >>> result = list(d.compare(text1, text2))
+
+ 'result' is a list of strings, so let's pretty-print it:
+
+ >>> from pprint import pprint as _pprint
+ >>> _pprint(result)
+ [' 1. Beautiful is better than ugly.\n',
+ '- 2. Explicit is better than implicit.\n',
+ '- 3. Simple is better than complex.\n',
+ '+ 3. Simple is better than complex.\n',
+ '? ++\n',
+ '- 4. Complex is better than complicated.\n',
+ '? ^ ---- ^\n',
+ '+ 4. Complicated is better than complex.\n',
+ '? ++++ ^ ^\n',
+ '+ 5. Flat is better than nested.\n']
+
+ As a single multi-line string it looks like this:
+
+ >>> print(''.join(result), end="")
+ 1. Beautiful is better than ugly.
+ - 2. Explicit is better than implicit.
+ - 3. Simple is better than complex.
+ + 3. Simple is better than complex.
+ ? ++
+ - 4. Complex is better than complicated.
+ ? ^ ---- ^
+ + 4. Complicated is better than complex.
+ ? ++++ ^ ^
+ + 5. Flat is better than nested.
+
+ Methods:
+
+ __init__(linejunk=None, charjunk=None)
+ Construct a text differencer, with optional filters.
+
+ compare(a, b)
+ Compare two sequences of lines; generate the resulting delta.
+ """
+
+ def __init__(self, linejunk=None, charjunk=None):
+ """
+ Construct a text differencer, with optional filters.
+
+ The two optional keyword parameters are for filter functions:
+
+ - `linejunk`: A function that should accept a single string argument,
+ and return true iff the string is junk. The module-level function
+ `IS_LINE_JUNK` may be used to filter out lines without visible
+ characters, except for at most one splat ('#'). It is recommended
+ to leave linejunk None; the underlying SequenceMatcher class has
+ an adaptive notion of "noise" lines that's better than any static
+ definition the author has ever been able to craft.
+
+ - `charjunk`: A function that should accept a string of length 1. The
+ module-level function `IS_CHARACTER_JUNK` may be used to filter out
+ whitespace characters (a blank or tab; **note**: bad idea to include
+ newline in this!). Use of IS_CHARACTER_JUNK is recommended.
+ """
+
+ self.linejunk = linejunk
+ self.charjunk = charjunk
+
+ def compare(self, a, b):
+ r"""
+ Compare two sequences of lines; generate the resulting delta.
+
+ Each sequence must contain individual single-line strings ending with
+ newlines. Such sequences can be obtained from the `readlines()` method
+ of file-like objects. The delta generated also consists of newline-
+ terminated strings, ready to be printed as-is via the writeline()
+ method of a file-like object.
+
+ Example:
+
+ >>> print(''.join(Differ().compare('one\ntwo\nthree\n'.splitlines(True),
+ ... 'ore\ntree\nemu\n'.splitlines(True))),
+ ... end="")
+ - one
+ ? ^
+ + ore
+ ? ^
+ - two
+ - three
+ ? -
+ + tree
+ + emu
+ """
+
+ cruncher = SequenceMatcher(self.linejunk, a, b)
+ for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
+ if tag == 'replace':
+ g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
+ elif tag == 'delete':
+ g = self._dump('-', a, alo, ahi)
+ elif tag == 'insert':
+ g = self._dump('+', b, blo, bhi)
+ elif tag == 'equal':
+ g = self._dump(' ', a, alo, ahi)
+ else:
+ raise ValueError('unknown tag %r' % (tag,))
+
+ yield from g
+
+ def _dump(self, tag, x, lo, hi):
+ """Generate comparison results for a same-tagged range."""
+ for i in range(lo, hi):
+ yield '%s %s' % (tag, x[i])
+
+ def _plain_replace(self, a, alo, ahi, b, blo, bhi):
+ assert alo < ahi and blo < bhi
+ # dump the shorter block first -- reduces the burden on short-term
+ # memory if the blocks are of very different sizes
+ if bhi - blo < ahi - alo:
+ first = self._dump('+', b, blo, bhi)
+ second = self._dump('-', a, alo, ahi)
+ else:
+ first = self._dump('-', a, alo, ahi)
+ second = self._dump('+', b, blo, bhi)
+
+ for g in first, second:
+ yield from g
+
+ def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
+ r"""
+ When replacing one block of lines with another, search the blocks
+ for *similar* lines; the best-matching pair (if any) is used as a
+ synch point, and intraline difference marking is done on the
+ similar pair. Lots of work, but often worth it.
+
+ Example:
+
+ >>> d = Differ()
+ >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
+ ... ['abcdefGhijkl\n'], 0, 1)
+ >>> print(''.join(results), end="")
+ - abcDefghiJkl
+ ? ^ ^ ^
+ + abcdefGhijkl
+ ? ^ ^ ^
+ """
+
+ # don't synch up unless the lines have a similarity score of at
+ # least cutoff; best_ratio tracks the best score seen so far
+ best_ratio, cutoff = 0.74, 0.75
+ cruncher = SequenceMatcher(self.charjunk)
+ eqi, eqj = None, None # 1st indices of equal lines (if any)
+
+ # search for the pair that matches best without being identical
+ # (identical lines must be junk lines, & we don't want to synch up
+ # on junk -- unless we have to)
+ for j in range(blo, bhi):
+ bj = b[j]
+ cruncher.set_seq2(bj)
+ for i in range(alo, ahi):
+ ai = a[i]
+ if ai == bj:
+ if eqi is None:
+ eqi, eqj = i, j
+ continue
+ cruncher.set_seq1(ai)
+ # computing similarity is expensive, so use the quick
+ # upper bounds first -- have seen this speed up messy
+ # compares by a factor of 3.
+ # note that ratio() is only expensive to compute the first
+ # time it's called on a sequence pair; the expensive part
+ # of the computation is cached by cruncher
+ if cruncher.real_quick_ratio() > best_ratio and \
+ cruncher.quick_ratio() > best_ratio and \
+ cruncher.ratio() > best_ratio:
+ best_ratio, best_i, best_j = cruncher.ratio(), i, j
+ if best_ratio < cutoff:
+ # no non-identical "pretty close" pair
+ if eqi is None:
+ # no identical pair either -- treat it as a straight replace
+ yield from self._plain_replace(a, alo, ahi, b, blo, bhi)
+ return
+ # no close pair, but an identical pair -- synch up on that
+ best_i, best_j, best_ratio = eqi, eqj, 1.0
+ else:
+ # there's a close pair, so forget the identical pair (if any)
+ eqi = None
+
+ # a[best_i] very similar to b[best_j]; eqi is None iff they're not
+ # identical
+
+ # pump out diffs from before the synch point
+ yield from self._fancy_helper(a, alo, best_i, b, blo, best_j)
+
+ # do intraline marking on the synch pair
+ aelt, belt = a[best_i], b[best_j]
+ if eqi is None:
+ # pump out a '-', '?', '+', '?' quad for the synched lines
+ atags = btags = ""
+ cruncher.set_seqs(aelt, belt)
+ for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
+ la, lb = ai2 - ai1, bj2 - bj1
+ if tag == 'replace':
+ atags += '^' * la
+ btags += '^' * lb
+ elif tag == 'delete':
+ atags += '-' * la
+ elif tag == 'insert':
+ btags += '+' * lb
+ elif tag == 'equal':
+ atags += ' ' * la
+ btags += ' ' * lb
+ else:
+ raise ValueError('unknown tag %r' % (tag,))
+ yield from self._qformat(aelt, belt, atags, btags)
+ else:
+ # the synch pair is identical
+ yield ' ' + aelt
+
+ # pump out diffs from after the synch point
+ yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
+
+ def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
+ g = []
+ if alo < ahi:
+ if blo < bhi:
+ g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
+ else:
+ g = self._dump('-', a, alo, ahi)
+ elif blo < bhi:
+ g = self._dump('+', b, blo, bhi)
+
+ yield from g
+
+ def _qformat(self, aline, bline, atags, btags):
+ r"""
+ Format "?" output and deal with tabs.
+
+ Example:
+
+ >>> d = Differ()
+ >>> results = d._qformat('\tabcDefghiJkl\n', '\tabcdefGhijkl\n',
+ ... ' ^ ^ ^ ', ' ^ ^ ^ ')
+ >>> for line in results: print(repr(line))
+ ...
+ '- \tabcDefghiJkl\n'
+ '? \t ^ ^ ^\n'
+ '+ \tabcdefGhijkl\n'
+ '? \t ^ ^ ^\n'
+ """
+ atags = _keep_original_ws(aline, atags).rstrip()
+ btags = _keep_original_ws(bline, btags).rstrip()
+
+ yield "- " + aline
+ if atags:
+ yield f"? {atags}\n"
+
+ yield "+ " + bline
+ if btags:
+ yield f"? {btags}\n"
+
+# With respect to junk, an earlier version of ndiff simply refused to
+# *start* a match with a junk element. The result was cases like this:
+# before: private Thread currentThread;
+# after: private volatile Thread currentThread;
+# If you consider whitespace to be junk, the longest contiguous match
+# not starting with junk is "e Thread currentThread". So ndiff reported
+# that "e volatil" was inserted between the 't' and the 'e' in "private".
+# While an accurate view, to people that's absurd. The current version
+# looks for matching blocks that are entirely junk-free, then extends the
+# longest one of those as far as possible but only with matching junk.
+# So now "currentThread" is matched, then extended to suck up the
+# preceding blank; then "private" is matched, and extended to suck up the
+# following blank; then "Thread" is matched; and finally ndiff reports
+# that "volatile " was inserted before "Thread". The only quibble
+# remaining is that perhaps it was really the case that " volatile"
+# was inserted after "private". I can live with that .
+
+import re
+
+def IS_LINE_JUNK(line, pat=re.compile(r"\s*(?:#\s*)?$").match):
+ r"""
+ Return True for ignorable line: iff `line` is blank or contains a single '#'.
+
+ Examples:
+
+ >>> IS_LINE_JUNK('\n')
+ True
+ >>> IS_LINE_JUNK(' # \n')
+ True
+ >>> IS_LINE_JUNK('hello\n')
+ False
+ """
+
+ return pat(line) is not None
+
+def IS_CHARACTER_JUNK(ch, ws=" \t"):
+ r"""
+ Return True for ignorable character: iff `ch` is a space or tab.
+
+ Examples:
+
+ >>> IS_CHARACTER_JUNK(' ')
+ True
+ >>> IS_CHARACTER_JUNK('\t')
+ True
+ >>> IS_CHARACTER_JUNK('\n')
+ False
+ >>> IS_CHARACTER_JUNK('x')
+ False
+ """
+
+ return ch in ws
+
+
+########################################################################
+### Unified Diff
+########################################################################
+
+def _format_range_unified(start, stop):
+ 'Convert range to the "ed" format'
+ # Per the diff spec at http://www.unix.org/single_unix_specification/
+ beginning = start + 1 # lines start numbering with one
+ length = stop - start
+ if length == 1:
+ return '{}'.format(beginning)
+ if not length:
+ beginning -= 1 # empty ranges begin at line just before the range
+ return '{},{}'.format(beginning, length)
+
+def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
+ tofiledate='', n=3, lineterm='\n'):
+ r"""
+ Compare two sequences of lines; generate the delta as a unified diff.
+
+ Unified diffs are a compact way of showing line changes and a few
+ lines of context. The number of context lines is set by 'n' which
+ defaults to three.
+
+ By default, the diff control lines (those with ---, +++, or @@) are
+ created with a trailing newline. This is helpful so that inputs
+ created from file.readlines() result in diffs that are suitable for
+ file.writelines() since both the inputs and outputs have trailing
+ newlines.
+
+ For inputs that do not have trailing newlines, set the lineterm
+ argument to "" so that the output will be uniformly newline free.
+
+ The unidiff format normally has a header for filenames and modification
+ times. Any or all of these may be specified using strings for
+ 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
+ The modification times are normally expressed in the ISO 8601 format.
+
+ Example:
+
+ >>> for line in unified_diff('one two three four'.split(),
+ ... 'zero one tree four'.split(), 'Original', 'Current',
+ ... '2005-01-26 23:30:50', '2010-04-02 10:20:52',
+ ... lineterm=''):
+ ... print(line) # doctest: +NORMALIZE_WHITESPACE
+ --- Original 2005-01-26 23:30:50
+ +++ Current 2010-04-02 10:20:52
+ @@ -1,4 +1,4 @@
+ +zero
+ one
+ -two
+ -three
+ +tree
+ four
+ """
+
+ _check_types(a, b, fromfile, tofile, fromfiledate, tofiledate, lineterm)
+ started = False
+ for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
+ if not started:
+ started = True
+ fromdate = '\t{}'.format(fromfiledate) if fromfiledate else ''
+ todate = '\t{}'.format(tofiledate) if tofiledate else ''
+ yield '--- {}{}{}'.format(fromfile, fromdate, lineterm)
+ yield '+++ {}{}{}'.format(tofile, todate, lineterm)
+
+ first, last = group[0], group[-1]
+ file1_range = _format_range_unified(first[1], last[2])
+ file2_range = _format_range_unified(first[3], last[4])
+ yield '@@ -{} +{} @@{}'.format(file1_range, file2_range, lineterm)
+
+ for tag, i1, i2, j1, j2 in group:
+ if tag == 'equal':
+ for line in a[i1:i2]:
+ yield ' ' + line
+ continue
+ if tag in {'replace', 'delete'}:
+ for line in a[i1:i2]:
+ yield '-' + line
+ if tag in {'replace', 'insert'}:
+ for line in b[j1:j2]:
+ yield '+' + line
+
+
+########################################################################
+### Context Diff
+########################################################################
+
+def _format_range_context(start, stop):
+ 'Convert range to the "ed" format'
+ # Per the diff spec at http://www.unix.org/single_unix_specification/
+ beginning = start + 1 # lines start numbering with one
+ length = stop - start
+ if not length:
+ beginning -= 1 # empty ranges begin at line just before the range
+ if length <= 1:
+ return '{}'.format(beginning)
+ return '{},{}'.format(beginning, beginning + length - 1)
+
+# See http://www.unix.org/single_unix_specification/
+def context_diff(a, b, fromfile='', tofile='',
+ fromfiledate='', tofiledate='', n=3, lineterm='\n'):
+ r"""
+ Compare two sequences of lines; generate the delta as a context diff.
+
+ Context diffs are a compact way of showing line changes and a few
+ lines of context. The number of context lines is set by 'n' which
+ defaults to three.
+
+ By default, the diff control lines (those with *** or ---) are
+ created with a trailing newline. This is helpful so that inputs
+ created from file.readlines() result in diffs that are suitable for
+ file.writelines() since both the inputs and outputs have trailing
+ newlines.
+
+ For inputs that do not have trailing newlines, set the lineterm
+ argument to "" so that the output will be uniformly newline free.
+
+ The context diff format normally has a header for filenames and
+ modification times. Any or all of these may be specified using
+ strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
+ The modification times are normally expressed in the ISO 8601 format.
+ If not specified, the strings default to blanks.
+
+ Example:
+
+ >>> print(''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(True),
+ ... 'zero\none\ntree\nfour\n'.splitlines(True), 'Original', 'Current')),
+ ... end="")
+ *** Original
+ --- Current
+ ***************
+ *** 1,4 ****
+ one
+ ! two
+ ! three
+ four
+ --- 1,4 ----
+ + zero
+ one
+ ! tree
+ four
+ """
+
+ _check_types(a, b, fromfile, tofile, fromfiledate, tofiledate, lineterm)
+ prefix = dict(insert='+ ', delete='- ', replace='! ', equal=' ')
+ started = False
+ for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
+ if not started:
+ started = True
+ fromdate = '\t{}'.format(fromfiledate) if fromfiledate else ''
+ todate = '\t{}'.format(tofiledate) if tofiledate else ''
+ yield '*** {}{}{}'.format(fromfile, fromdate, lineterm)
+ yield '--- {}{}{}'.format(tofile, todate, lineterm)
+
+ first, last = group[0], group[-1]
+ yield '***************' + lineterm
+
+ file1_range = _format_range_context(first[1], last[2])
+ yield '*** {} ****{}'.format(file1_range, lineterm)
+
+ if any(tag in {'replace', 'delete'} for tag, _, _, _, _ in group):
+ for tag, i1, i2, _, _ in group:
+ if tag != 'insert':
+ for line in a[i1:i2]:
+ yield prefix[tag] + line
+
+ file2_range = _format_range_context(first[3], last[4])
+ yield '--- {} ----{}'.format(file2_range, lineterm)
+
+ if any(tag in {'replace', 'insert'} for tag, _, _, _, _ in group):
+ for tag, _, _, j1, j2 in group:
+ if tag != 'delete':
+ for line in b[j1:j2]:
+ yield prefix[tag] + line
+
+def _check_types(a, b, *args):
+ # Checking types is weird, but the alternative is garbled output when
+ # someone passes mixed bytes and str to {unified,context}_diff(). E.g.
+ # without this check, passing filenames as bytes results in output like
+ # --- b'oldfile.txt'
+ # +++ b'newfile.txt'
+ # because of how str.format() incorporates bytes objects.
+ if a and not isinstance(a[0], str):
+ raise TypeError('lines to compare must be str, not %s (%r)' %
+ (type(a[0]).__name__, a[0]))
+ if b and not isinstance(b[0], str):
+ raise TypeError('lines to compare must be str, not %s (%r)' %
+ (type(b[0]).__name__, b[0]))
+ for arg in args:
+ if not isinstance(arg, str):
+ raise TypeError('all arguments must be str, not: %r' % (arg,))
+
+def diff_bytes(dfunc, a, b, fromfile=b'', tofile=b'',
+ fromfiledate=b'', tofiledate=b'', n=3, lineterm=b'\n'):
+ r"""
+ Compare `a` and `b`, two sequences of lines represented as bytes rather
+ than str. This is a wrapper for `dfunc`, which is typically either
+ unified_diff() or context_diff(). Inputs are losslessly converted to
+ strings so that `dfunc` only has to worry about strings, and encoded
+ back to bytes on return. This is necessary to compare files with
+ unknown or inconsistent encoding. All other inputs (except `n`) must be
+ bytes rather than str.
+ """
+ def decode(s):
+ try:
+ return s.decode('ascii', 'surrogateescape')
+ except AttributeError as err:
+ msg = ('all arguments must be bytes, not %s (%r)' %
+ (type(s).__name__, s))
+ raise TypeError(msg) from err
+ a = list(map(decode, a))
+ b = list(map(decode, b))
+ fromfile = decode(fromfile)
+ tofile = decode(tofile)
+ fromfiledate = decode(fromfiledate)
+ tofiledate = decode(tofiledate)
+ lineterm = decode(lineterm)
+
+ lines = dfunc(a, b, fromfile, tofile, fromfiledate, tofiledate, n, lineterm)
+ for line in lines:
+ yield line.encode('ascii', 'surrogateescape')
+
+def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
+ r"""
+ Compare `a` and `b` (lists of strings); return a `Differ`-style delta.
+
+ Optional keyword parameters `linejunk` and `charjunk` are for filter
+ functions, or can be None:
+
+ - linejunk: A function that should accept a single string argument and
+ return true iff the string is junk. The default is None, and is
+ recommended; the underlying SequenceMatcher class has an adaptive
+ notion of "noise" lines.
+
+ - charjunk: A function that accepts a character (string of length
+ 1), and returns true iff the character is junk. The default is
+ the module-level function IS_CHARACTER_JUNK, which filters out
+ whitespace characters (a blank or tab; note: it's a bad idea to
+ include newline in this!).
+
+ Tools/scripts/ndiff.py is a command-line front-end to this function.
+
+ Example:
+
+ >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(keepends=True),
+ ... 'ore\ntree\nemu\n'.splitlines(keepends=True))
+ >>> print(''.join(diff), end="")
+ - one
+ ? ^
+ + ore
+ ? ^
+ - two
+ - three
+ ? -
+ + tree
+ + emu
+ """
+ return Differ(linejunk, charjunk).compare(a, b)
+
+def _mdiff(fromlines, tolines, context=None, linejunk=None,
+ charjunk=IS_CHARACTER_JUNK):
+ r"""Returns generator yielding marked up from/to side by side differences.
+
+ Arguments:
+ fromlines -- list of text lines to compared to tolines
+ tolines -- list of text lines to be compared to fromlines
+ context -- number of context lines to display on each side of difference,
+ if None, all from/to text lines will be generated.
+ linejunk -- passed on to ndiff (see ndiff documentation)
+ charjunk -- passed on to ndiff (see ndiff documentation)
+
+ This function returns an iterator which returns a tuple:
+ (from line tuple, to line tuple, boolean flag)
+
+ from/to line tuple -- (line num, line text)
+ line num -- integer or None (to indicate a context separation)
+ line text -- original line text with following markers inserted:
+ '\0+' -- marks start of added text
+ '\0-' -- marks start of deleted text
+ '\0^' -- marks start of changed text
+ '\1' -- marks end of added/deleted/changed text
+
+ boolean flag -- None indicates context separation, True indicates
+ either "from" or "to" line contains a change, otherwise False.
+
+ This function/iterator was originally developed to generate side by side
+ file difference for making HTML pages (see HtmlDiff class for example
+ usage).
+
+ Note, this function utilizes the ndiff function to generate the side by
+ side difference markup. Optional ndiff arguments may be passed to this
+ function and they in turn will be passed to ndiff.
+ """
+ import re
+
+ # regular expression for finding intraline change indices
+ change_re = re.compile(r'(\++|\-+|\^+)')
+
+ # create the difference iterator to generate the differences
+ diff_lines_iterator = ndiff(fromlines,tolines,linejunk,charjunk)
+
+ def _make_line(lines, format_key, side, num_lines=[0,0]):
+ """Returns line of text with user's change markup and line formatting.
+
+ lines -- list of lines from the ndiff generator to produce a line of
+ text from. When producing the line of text to return, the
+ lines used are removed from this list.
+ format_key -- '+' return first line in list with "add" markup around
+ the entire line.
+ '-' return first line in list with "delete" markup around
+ the entire line.
+ '?' return first line in list with add/delete/change
+ intraline markup (indices obtained from second line)
+ None return first line in list with no markup
+ side -- indice into the num_lines list (0=from,1=to)
+ num_lines -- from/to current line number. This is NOT intended to be a
+ passed parameter. It is present as a keyword argument to
+ maintain memory of the current line numbers between calls
+ of this function.
+
+ Note, this function is purposefully not defined at the module scope so
+ that data it needs from its parent function (within whose context it
+ is defined) does not need to be of module scope.
+ """
+ num_lines[side] += 1
+ # Handle case where no user markup is to be added, just return line of
+ # text with user's line format to allow for usage of the line number.
+ if format_key is None:
+ return (num_lines[side],lines.pop(0)[2:])
+ # Handle case of intraline changes
+ if format_key == '?':
+ text, markers = lines.pop(0), lines.pop(0)
+ # find intraline changes (store change type and indices in tuples)
+ sub_info = []
+ def record_sub_info(match_object,sub_info=sub_info):
+ sub_info.append([match_object.group(1)[0],match_object.span()])
+ return match_object.group(1)
+ change_re.sub(record_sub_info,markers)
+ # process each tuple inserting our special marks that won't be
+ # noticed by an xml/html escaper.
+ for key,(begin,end) in reversed(sub_info):
+ text = text[0:begin]+'\0'+key+text[begin:end]+'\1'+text[end:]
+ text = text[2:]
+ # Handle case of add/delete entire line
+ else:
+ text = lines.pop(0)[2:]
+ # if line of text is just a newline, insert a space so there is
+ # something for the user to highlight and see.
+ if not text:
+ text = ' '
+ # insert marks that won't be noticed by an xml/html escaper.
+ text = '\0' + format_key + text + '\1'
+ # Return line of text, first allow user's line formatter to do its
+ # thing (such as adding the line number) then replace the special
+ # marks with what the user's change markup.
+ return (num_lines[side],text)
+
+ def _line_iterator():
+ """Yields from/to lines of text with a change indication.
+
+ This function is an iterator. It itself pulls lines from a
+ differencing iterator, processes them and yields them. When it can
+ it yields both a "from" and a "to" line, otherwise it will yield one
+ or the other. In addition to yielding the lines of from/to text, a
+ boolean flag is yielded to indicate if the text line(s) have
+ differences in them.
+
+ Note, this function is purposefully not defined at the module scope so
+ that data it needs from its parent function (within whose context it
+ is defined) does not need to be of module scope.
+ """
+ lines = []
+ num_blanks_pending, num_blanks_to_yield = 0, 0
+ while True:
+ # Load up next 4 lines so we can look ahead, create strings which
+ # are a concatenation of the first character of each of the 4 lines
+ # so we can do some very readable comparisons.
+ while len(lines) < 4:
+ lines.append(next(diff_lines_iterator, 'X'))
+ s = ''.join([line[0] for line in lines])
+ if s.startswith('X'):
+ # When no more lines, pump out any remaining blank lines so the
+ # corresponding add/delete lines get a matching blank line so
+ # all line pairs get yielded at the next level.
+ num_blanks_to_yield = num_blanks_pending
+ elif s.startswith('-?+?'):
+ # simple intraline change
+ yield _make_line(lines,'?',0), _make_line(lines,'?',1), True
+ continue
+ elif s.startswith('--++'):
+ # in delete block, add block coming: we do NOT want to get
+ # caught up on blank lines yet, just process the delete line
+ num_blanks_pending -= 1
+ yield _make_line(lines,'-',0), None, True
+ continue
+ elif s.startswith(('--?+', '--+', '- ')):
+ # in delete block and see an intraline change or unchanged line
+ # coming: yield the delete line and then blanks
+ from_line,to_line = _make_line(lines,'-',0), None
+ num_blanks_to_yield,num_blanks_pending = num_blanks_pending-1,0
+ elif s.startswith('-+?'):
+ # intraline change
+ yield _make_line(lines,None,0), _make_line(lines,'?',1), True
+ continue
+ elif s.startswith('-?+'):
+ # intraline change
+ yield _make_line(lines,'?',0), _make_line(lines,None,1), True
+ continue
+ elif s.startswith('-'):
+ # delete FROM line
+ num_blanks_pending -= 1
+ yield _make_line(lines,'-',0), None, True
+ continue
+ elif s.startswith('+--'):
+ # in add block, delete block coming: we do NOT want to get
+ # caught up on blank lines yet, just process the add line
+ num_blanks_pending += 1
+ yield None, _make_line(lines,'+',1), True
+ continue
+ elif s.startswith(('+ ', '+-')):
+ # will be leaving an add block: yield blanks then add line
+ from_line, to_line = None, _make_line(lines,'+',1)
+ num_blanks_to_yield,num_blanks_pending = num_blanks_pending+1,0
+ elif s.startswith('+'):
+ # inside an add block, yield the add line
+ num_blanks_pending += 1
+ yield None, _make_line(lines,'+',1), True
+ continue
+ elif s.startswith(' '):
+ # unchanged text, yield it to both sides
+ yield _make_line(lines[:],None,0),_make_line(lines,None,1),False
+ continue
+ # Catch up on the blank lines so when we yield the next from/to
+ # pair, they are lined up.
+ while(num_blanks_to_yield < 0):
+ num_blanks_to_yield += 1
+ yield None,('','\n'),True
+ while(num_blanks_to_yield > 0):
+ num_blanks_to_yield -= 1
+ yield ('','\n'),None,True
+ if s.startswith('X'):
+ return
+ else:
+ yield from_line,to_line,True
+
+ def _line_pair_iterator():
+ """Yields from/to lines of text with a change indication.
+
+ This function is an iterator. It itself pulls lines from the line
+ iterator. Its difference from that iterator is that this function
+ always yields a pair of from/to text lines (with the change
+ indication). If necessary it will collect single from/to lines
+ until it has a matching pair from/to pair to yield.
+
+ Note, this function is purposefully not defined at the module scope so
+ that data it needs from its parent function (within whose context it
+ is defined) does not need to be of module scope.
+ """
+ line_iterator = _line_iterator()
+ fromlines,tolines=[],[]
+ while True:
+ # Collecting lines of text until we have a from/to pair
+ while (len(fromlines)==0 or len(tolines)==0):
+ try:
+ from_line, to_line, found_diff = next(line_iterator)
+ except StopIteration:
+ return
+ if from_line is not None:
+ fromlines.append((from_line,found_diff))
+ if to_line is not None:
+ tolines.append((to_line,found_diff))
+ # Once we have a pair, remove them from the collection and yield it
+ from_line, fromDiff = fromlines.pop(0)
+ to_line, to_diff = tolines.pop(0)
+ yield (from_line,to_line,fromDiff or to_diff)
+
+ # Handle case where user does not want context differencing, just yield
+ # them up without doing anything else with them.
+ line_pair_iterator = _line_pair_iterator()
+ if context is None:
+ yield from line_pair_iterator
+ # Handle case where user wants context differencing. We must do some
+ # storage of lines until we know for sure that they are to be yielded.
+ else:
+ context += 1
+ lines_to_write = 0
+ while True:
+ # Store lines up until we find a difference, note use of a
+ # circular queue because we only need to keep around what
+ # we need for context.
+ index, contextLines = 0, [None]*(context)
+ found_diff = False
+ while(found_diff is False):
+ try:
+ from_line, to_line, found_diff = next(line_pair_iterator)
+ except StopIteration:
+ return
+ i = index % context
+ contextLines[i] = (from_line, to_line, found_diff)
+ index += 1
+ # Yield lines that we have collected so far, but first yield
+ # the user's separator.
+ if index > context:
+ yield None, None, None
+ lines_to_write = context
+ else:
+ lines_to_write = index
+ index = 0
+ while(lines_to_write):
+ i = index % context
+ index += 1
+ yield contextLines[i]
+ lines_to_write -= 1
+ # Now yield the context lines after the change
+ lines_to_write = context-1
+ try:
+ while(lines_to_write):
+ from_line, to_line, found_diff = next(line_pair_iterator)
+ # If another change within the context, extend the context
+ if found_diff:
+ lines_to_write = context-1
+ else:
+ lines_to_write -= 1
+ yield from_line, to_line, found_diff
+ except StopIteration:
+ # Catch exception from next() and return normally
+ return
+
+
+_file_template = """
+
+
+
+
+
+
+
+
+
+
+
+ %(table)s%(legend)s
+
+
+"""
+
+_styles = """
+ table.diff {font-family:Courier; border:medium;}
+ .diff_header {background-color:#e0e0e0}
+ td.diff_header {text-align:right}
+ .diff_next {background-color:#c0c0c0}
+ .diff_add {background-color:#aaffaa}
+ .diff_chg {background-color:#ffff77}
+ .diff_sub {background-color:#ffaaaa}"""
+
+_table_template = """
+
+
+
+ %(header_row)s
+
+%(data_rows)s
+
"""
+
+_legend = """
+
+ Legends
+
+ Colors
+ Added
+ Changed
+ Deleted
+
+
+ Links
+ (f)irst change
+ (n)ext change
+ (t)op
+
+
"""
+
+class HtmlDiff(object):
+ """For producing HTML side by side comparison with change highlights.
+
+ This class can be used to create an HTML table (or a complete HTML file
+ containing the table) showing a side by side, line by line comparison
+ of text with inter-line and intra-line change highlights. The table can
+ be generated in either full or contextual difference mode.
+
+ The following methods are provided for HTML generation:
+
+ make_table -- generates HTML for a single side by side table
+ make_file -- generates complete HTML file with a single side by side table
+
+ See tools/scripts/diff.py for an example usage of this class.
+ """
+
+ _file_template = _file_template
+ _styles = _styles
+ _table_template = _table_template
+ _legend = _legend
+ _default_prefix = 0
+
+ def __init__(self,tabsize=8,wrapcolumn=None,linejunk=None,
+ charjunk=IS_CHARACTER_JUNK):
+ """HtmlDiff instance initializer
+
+ Arguments:
+ tabsize -- tab stop spacing, defaults to 8.
+ wrapcolumn -- column number where lines are broken and wrapped,
+ defaults to None where lines are not wrapped.
+ linejunk,charjunk -- keyword arguments passed into ndiff() (used by
+ HtmlDiff() to generate the side by side HTML differences). See
+ ndiff() documentation for argument default values and descriptions.
+ """
+ self._tabsize = tabsize
+ self._wrapcolumn = wrapcolumn
+ self._linejunk = linejunk
+ self._charjunk = charjunk
+
+ def make_file(self, fromlines, tolines, fromdesc='', todesc='',
+ context=False, numlines=5, *, charset='utf-8'):
+ """Returns HTML file of side by side comparison with change highlights
+
+ Arguments:
+ fromlines -- list of "from" lines
+ tolines -- list of "to" lines
+ fromdesc -- "from" file column header string
+ todesc -- "to" file column header string
+ context -- set to True for contextual differences (defaults to False
+ which shows full differences).
+ numlines -- number of context lines. When context is set True,
+ controls number of lines displayed before and after the change.
+ When context is False, controls the number of lines to place
+ the "next" link anchors before the next change (so click of
+ "next" link jumps to just before the change).
+ charset -- charset of the HTML document
+ """
+
+ return (self._file_template % dict(
+ styles=self._styles,
+ legend=self._legend,
+ table=self.make_table(fromlines, tolines, fromdesc, todesc,
+ context=context, numlines=numlines),
+ charset=charset
+ )).encode(charset, 'xmlcharrefreplace').decode(charset)
+
+ def _tab_newline_replace(self,fromlines,tolines):
+ """Returns from/to line lists with tabs expanded and newlines removed.
+
+ Instead of tab characters being replaced by the number of spaces
+ needed to fill in to the next tab stop, this function will fill
+ the space with tab characters. This is done so that the difference
+ algorithms can identify changes in a file when tabs are replaced by
+ spaces and vice versa. At the end of the HTML generation, the tab
+ characters will be replaced with a nonbreakable space.
+ """
+ def expand_tabs(line):
+ # hide real spaces
+ line = line.replace(' ','\0')
+ # expand tabs into spaces
+ line = line.expandtabs(self._tabsize)
+ # replace spaces from expanded tabs back into tab characters
+ # (we'll replace them with markup after we do differencing)
+ line = line.replace(' ','\t')
+ return line.replace('\0',' ').rstrip('\n')
+ fromlines = [expand_tabs(line) for line in fromlines]
+ tolines = [expand_tabs(line) for line in tolines]
+ return fromlines,tolines
+
+ def _split_line(self,data_list,line_num,text):
+ """Builds list of text lines by splitting text lines at wrap point
+
+ This function will determine if the input text line needs to be
+ wrapped (split) into separate lines. If so, the first wrap point
+ will be determined and the first line appended to the output
+ text line list. This function is used recursively to handle
+ the second part of the split line to further split it.
+ """
+ # if blank line or context separator, just add it to the output list
+ if not line_num:
+ data_list.append((line_num,text))
+ return
+
+ # if line text doesn't need wrapping, just add it to the output list
+ size = len(text)
+ max = self._wrapcolumn
+ if (size <= max) or ((size -(text.count('\0')*3)) <= max):
+ data_list.append((line_num,text))
+ return
+
+ # scan text looking for the wrap point, keeping track if the wrap
+ # point is inside markers
+ i = 0
+ n = 0
+ mark = ''
+ while n < max and i < size:
+ if text[i] == '\0':
+ i += 1
+ mark = text[i]
+ i += 1
+ elif text[i] == '\1':
+ i += 1
+ mark = ''
+ else:
+ i += 1
+ n += 1
+
+ # wrap point is inside text, break it up into separate lines
+ line1 = text[:i]
+ line2 = text[i:]
+
+ # if wrap point is inside markers, place end marker at end of first
+ # line and start marker at beginning of second line because each
+ # line will have its own table tag markup around it.
+ if mark:
+ line1 = line1 + '\1'
+ line2 = '\0' + mark + line2
+
+ # tack on first line onto the output list
+ data_list.append((line_num,line1))
+
+ # use this routine again to wrap the remaining text
+ self._split_line(data_list,'>',line2)
+
+ def _line_wrapper(self,diffs):
+ """Returns iterator that splits (wraps) mdiff text lines"""
+
+ # pull from/to data and flags from mdiff iterator
+ for fromdata,todata,flag in diffs:
+ # check for context separators and pass them through
+ if flag is None:
+ yield fromdata,todata,flag
+ continue
+ (fromline,fromtext),(toline,totext) = fromdata,todata
+ # for each from/to line split it at the wrap column to form
+ # list of text lines.
+ fromlist,tolist = [],[]
+ self._split_line(fromlist,fromline,fromtext)
+ self._split_line(tolist,toline,totext)
+ # yield from/to line in pairs inserting blank lines as
+ # necessary when one side has more wrapped lines
+ while fromlist or tolist:
+ if fromlist:
+ fromdata = fromlist.pop(0)
+ else:
+ fromdata = ('',' ')
+ if tolist:
+ todata = tolist.pop(0)
+ else:
+ todata = ('',' ')
+ yield fromdata,todata,flag
+
+ def _collect_lines(self,diffs):
+ """Collects mdiff output into separate lists
+
+ Before storing the mdiff from/to data into a list, it is converted
+ into a single line of text with HTML markup.
+ """
+
+ fromlist,tolist,flaglist = [],[],[]
+ # pull from/to data and flags from mdiff style iterator
+ for fromdata,todata,flag in diffs:
+ try:
+ # store HTML markup of the lines into the lists
+ fromlist.append(self._format_line(0,flag,*fromdata))
+ tolist.append(self._format_line(1,flag,*todata))
+ except TypeError:
+ # exceptions occur for lines where context separators go
+ fromlist.append(None)
+ tolist.append(None)
+ flaglist.append(flag)
+ return fromlist,tolist,flaglist
+
+ def _format_line(self,side,flag,linenum,text):
+ """Returns HTML markup of "from" / "to" text lines
+
+ side -- 0 or 1 indicating "from" or "to" text
+ flag -- indicates if difference on line
+ linenum -- line number (used for line number column)
+ text -- line text to be marked up
+ """
+ try:
+ linenum = '%d' % linenum
+ id = ' id="%s%s"' % (self._prefix[side],linenum)
+ except TypeError:
+ # handle blank lines where linenum is '>' or ''
+ id = ''
+ # replace those things that would get confused with HTML symbols
+ text=text.replace("&","&").replace(">",">").replace("<","<")
+
+ # make space non-breakable so they don't get compressed or line wrapped
+ text = text.replace(' ',' ').rstrip()
+
+ return '%s ' \
+ % (id,linenum,text)
+
+ def _make_prefix(self):
+ """Create unique anchor prefixes"""
+
+ # Generate a unique anchor prefix so multiple tables
+ # can exist on the same HTML page without conflicts.
+ fromprefix = "from%d_" % HtmlDiff._default_prefix
+ toprefix = "to%d_" % HtmlDiff._default_prefix
+ HtmlDiff._default_prefix += 1
+ # store prefixes so line format method has access
+ self._prefix = [fromprefix,toprefix]
+
+ def _convert_flags(self,fromlist,tolist,flaglist,context,numlines):
+ """Makes list of "next" links"""
+
+ # all anchor names will be generated using the unique "to" prefix
+ toprefix = self._prefix[1]
+
+ # process change flags, generating middle column of next anchors/links
+ next_id = ['']*len(flaglist)
+ next_href = ['']*len(flaglist)
+ num_chg, in_change = 0, False
+ last = 0
+ for i,flag in enumerate(flaglist):
+ if flag:
+ if not in_change:
+ in_change = True
+ last = i
+ # at the beginning of a change, drop an anchor a few lines
+ # (the context lines) before the change for the previous
+ # link
+ i = max([0,i-numlines])
+ next_id[i] = ' id="difflib_chg_%s_%d"' % (toprefix,num_chg)
+ # at the beginning of a change, drop a link to the next
+ # change
+ num_chg += 1
+ next_href[last] = 'n ' % (
+ toprefix,num_chg)
+ else:
+ in_change = False
+ # check for cases where there is no content to avoid exceptions
+ if not flaglist:
+ flaglist = [False]
+ next_id = ['']
+ next_href = ['']
+ last = 0
+ if context:
+ fromlist = [' No Differences Found ']
+ tolist = fromlist
+ else:
+ fromlist = tolist = [' Empty File ']
+ # if not a change on first line, drop a link
+ if not flaglist[0]:
+ next_href[0] = 'f ' % toprefix
+ # redo the last link to link to the top
+ next_href[last] = 't ' % (toprefix)
+
+ return fromlist,tolist,flaglist,next_href,next_id
+
+ def make_table(self,fromlines,tolines,fromdesc='',todesc='',context=False,
+ numlines=5):
+ """Returns HTML table of side by side comparison with change highlights
+
+ Arguments:
+ fromlines -- list of "from" lines
+ tolines -- list of "to" lines
+ fromdesc -- "from" file column header string
+ todesc -- "to" file column header string
+ context -- set to True for contextual differences (defaults to False
+ which shows full differences).
+ numlines -- number of context lines. When context is set True,
+ controls number of lines displayed before and after the change.
+ When context is False, controls the number of lines to place
+ the "next" link anchors before the next change (so click of
+ "next" link jumps to just before the change).
+ """
+
+ # make unique anchor prefixes so that multiple tables may exist
+ # on the same page without conflict.
+ self._make_prefix()
+
+ # change tabs to spaces before it gets more difficult after we insert
+ # markup
+ fromlines,tolines = self._tab_newline_replace(fromlines,tolines)
+
+ # create diffs iterator which generates side by side from/to data
+ if context:
+ context_lines = numlines
+ else:
+ context_lines = None
+ diffs = _mdiff(fromlines,tolines,context_lines,linejunk=self._linejunk,
+ charjunk=self._charjunk)
+
+ # set up iterator to wrap lines that exceed desired width
+ if self._wrapcolumn:
+ diffs = self._line_wrapper(diffs)
+
+ # collect up from/to lines and flags into lists (also format the lines)
+ fromlist,tolist,flaglist = self._collect_lines(diffs)
+
+ # process change flags, generating middle column of next anchors/links
+ fromlist,tolist,flaglist,next_href,next_id = self._convert_flags(
+ fromlist,tolist,flaglist,context,numlines)
+
+ s = []
+ fmt = ' %s %s' + \
+ '%s %s \n'
+ for i in range(len(flaglist)):
+ if flaglist[i] is None:
+ # mdiff yields None on separator lines skip the bogus ones
+ # generated for the first line
+ if i > 0:
+ s.append(' \n \n')
+ else:
+ s.append( fmt % (next_id[i],next_href[i],fromlist[i],
+ next_href[i],tolist[i]))
+ if fromdesc or todesc:
+ header_row = '%s%s%s%s ' % (
+ ' ',
+ '' % fromdesc,
+ ' ',
+ '' % todesc)
+ else:
+ header_row = ''
+
+ table = self._table_template % dict(
+ data_rows=''.join(s),
+ header_row=header_row,
+ prefix=self._prefix[1])
+
+ return table.replace('\0+',''). \
+ replace('\0-',''). \
+ replace('\0^',''). \
+ replace('\1',' '). \
+ replace('\t',' ')
+
+del re
+
+def restore(delta, which):
+ r"""
+ Generate one of the two sequences that generated a delta.
+
+ Given a `delta` produced by `Differ.compare()` or `ndiff()`, extract
+ lines originating from file 1 or 2 (parameter `which`), stripping off line
+ prefixes.
+
+ Examples:
+
+ >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(keepends=True),
+ ... 'ore\ntree\nemu\n'.splitlines(keepends=True))
+ >>> diff = list(diff)
+ >>> print(''.join(restore(diff, 1)), end="")
+ one
+ two
+ three
+ >>> print(''.join(restore(diff, 2)), end="")
+ ore
+ tree
+ emu
+ """
+ try:
+ tag = {1: "- ", 2: "+ "}[int(which)]
+ except KeyError:
+ raise ValueError('unknown delta choice (must be 1 or 2): %r'
+ % which) from None
+ prefixes = (" ", tag)
+ for line in delta:
+ if line[:2] in prefixes:
+ yield line[2:]
+
+def _test():
+ import doctest, difflib
+ return doctest.testmod(difflib)
+
+if __name__ == "__main__":
+ _test()
diff --git a/dist/lib/dis.py b/dist/lib/dis.py
new file mode 100644
index 0000000..10e5f7f
--- /dev/null
+++ b/dist/lib/dis.py
@@ -0,0 +1,553 @@
+"""Disassembler of Python byte code into mnemonics."""
+
+import sys
+import types
+import collections
+import io
+
+from opcode import *
+from opcode import __all__ as _opcodes_all
+
+__all__ = ["code_info", "dis", "disassemble", "distb", "disco",
+ "findlinestarts", "findlabels", "show_code",
+ "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
+del _opcodes_all
+
+_have_code = (types.MethodType, types.FunctionType, types.CodeType,
+ classmethod, staticmethod, type)
+
+FORMAT_VALUE = opmap['FORMAT_VALUE']
+FORMAT_VALUE_CONVERTERS = (
+ (None, ''),
+ (str, 'str'),
+ (repr, 'repr'),
+ (ascii, 'ascii'),
+)
+MAKE_FUNCTION = opmap['MAKE_FUNCTION']
+MAKE_FUNCTION_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure')
+
+
+def _try_compile(source, name):
+ """Attempts to compile the given source, first as an expression and
+ then as a statement if the first approach fails.
+
+ Utility function to accept strings in functions that otherwise
+ expect code objects
+ """
+ try:
+ c = compile(source, name, 'eval')
+ except SyntaxError:
+ c = compile(source, name, 'exec')
+ return c
+
+def dis(x=None, *, file=None, depth=None):
+ """Disassemble classes, methods, functions, and other compiled objects.
+
+ With no argument, disassemble the last traceback.
+
+ Compiled objects currently include generator objects, async generator
+ objects, and coroutine objects, all of which store their code object
+ in a special attribute.
+ """
+ if x is None:
+ distb(file=file)
+ return
+ # Extract functions from methods.
+ if hasattr(x, '__func__'):
+ x = x.__func__
+ # Extract compiled code objects from...
+ if hasattr(x, '__code__'): # ...a function, or
+ x = x.__code__
+ elif hasattr(x, 'gi_code'): #...a generator object, or
+ x = x.gi_code
+ elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or
+ x = x.ag_code
+ elif hasattr(x, 'cr_code'): #...a coroutine.
+ x = x.cr_code
+ # Perform the disassembly.
+ if hasattr(x, '__dict__'): # Class or module
+ items = sorted(x.__dict__.items())
+ for name, x1 in items:
+ if isinstance(x1, _have_code):
+ print("Disassembly of %s:" % name, file=file)
+ try:
+ dis(x1, file=file, depth=depth)
+ except TypeError as msg:
+ print("Sorry:", msg, file=file)
+ print(file=file)
+ elif hasattr(x, 'co_code'): # Code object
+ _disassemble_recursive(x, file=file, depth=depth)
+ elif isinstance(x, (bytes, bytearray)): # Raw bytecode
+ _disassemble_bytes(x, file=file)
+ elif isinstance(x, str): # Source code
+ _disassemble_str(x, file=file, depth=depth)
+ else:
+ raise TypeError("don't know how to disassemble %s objects" %
+ type(x).__name__)
+
+def distb(tb=None, *, file=None):
+ """Disassemble a traceback (default: last traceback)."""
+ if tb is None:
+ try:
+ tb = sys.last_traceback
+ except AttributeError:
+ raise RuntimeError("no last traceback to disassemble") from None
+ while tb.tb_next: tb = tb.tb_next
+ disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file)
+
+# The inspect module interrogates this dictionary to build its
+# list of CO_* constants. It is also used by pretty_flags to
+# turn the co_flags field into a human readable list.
+COMPILER_FLAG_NAMES = {
+ 1: "OPTIMIZED",
+ 2: "NEWLOCALS",
+ 4: "VARARGS",
+ 8: "VARKEYWORDS",
+ 16: "NESTED",
+ 32: "GENERATOR",
+ 64: "NOFREE",
+ 128: "COROUTINE",
+ 256: "ITERABLE_COROUTINE",
+ 512: "ASYNC_GENERATOR",
+}
+
+def pretty_flags(flags):
+ """Return pretty representation of code flags."""
+ names = []
+ for i in range(32):
+ flag = 1<")
+ # By now, if we don't have a code object, we can't disassemble x.
+ if hasattr(x, 'co_code'):
+ return x
+ raise TypeError("don't know how to disassemble %s objects" %
+ type(x).__name__)
+
+def code_info(x):
+ """Formatted details of methods, functions, or code."""
+ return _format_code_info(_get_code_object(x))
+
+def _format_code_info(co):
+ lines = []
+ lines.append("Name: %s" % co.co_name)
+ lines.append("Filename: %s" % co.co_filename)
+ lines.append("Argument count: %s" % co.co_argcount)
+ lines.append("Positional-only arguments: %s" % co.co_posonlyargcount)
+ lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
+ lines.append("Number of locals: %s" % co.co_nlocals)
+ lines.append("Stack size: %s" % co.co_stacksize)
+ lines.append("Flags: %s" % pretty_flags(co.co_flags))
+ if co.co_consts:
+ lines.append("Constants:")
+ for i_c in enumerate(co.co_consts):
+ lines.append("%4d: %r" % i_c)
+ if co.co_names:
+ lines.append("Names:")
+ for i_n in enumerate(co.co_names):
+ lines.append("%4d: %s" % i_n)
+ if co.co_varnames:
+ lines.append("Variable names:")
+ for i_n in enumerate(co.co_varnames):
+ lines.append("%4d: %s" % i_n)
+ if co.co_freevars:
+ lines.append("Free variables:")
+ for i_n in enumerate(co.co_freevars):
+ lines.append("%4d: %s" % i_n)
+ if co.co_cellvars:
+ lines.append("Cell variables:")
+ for i_n in enumerate(co.co_cellvars):
+ lines.append("%4d: %s" % i_n)
+ return "\n".join(lines)
+
+def show_code(co, *, file=None):
+ """Print details of methods, functions, or code to *file*.
+
+ If *file* is not provided, the output is printed on stdout.
+ """
+ print(code_info(co), file=file)
+
+_Instruction = collections.namedtuple("_Instruction",
+ "opname opcode arg argval argrepr offset starts_line is_jump_target")
+
+_Instruction.opname.__doc__ = "Human readable name for operation"
+_Instruction.opcode.__doc__ = "Numeric code for operation"
+_Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None"
+_Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg"
+_Instruction.argrepr.__doc__ = "Human readable description of operation argument"
+_Instruction.offset.__doc__ = "Start index of operation within bytecode sequence"
+_Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None"
+_Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False"
+
+_OPNAME_WIDTH = 20
+_OPARG_WIDTH = 5
+
+class Instruction(_Instruction):
+ """Details for a bytecode operation
+
+ Defined fields:
+ opname - human readable name for operation
+ opcode - numeric code for operation
+ arg - numeric argument to operation (if any), otherwise None
+ argval - resolved arg value (if known), otherwise same as arg
+ argrepr - human readable description of operation argument
+ offset - start index of operation within bytecode sequence
+ starts_line - line started by this opcode (if any), otherwise None
+ is_jump_target - True if other code jumps to here, otherwise False
+ """
+
+ def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4):
+ """Format instruction details for inclusion in disassembly output
+
+ *lineno_width* sets the width of the line number field (0 omits it)
+ *mark_as_current* inserts a '-->' marker arrow as part of the line
+ *offset_width* sets the width of the instruction offset field
+ """
+ fields = []
+ # Column: Source code line number
+ if lineno_width:
+ if self.starts_line is not None:
+ lineno_fmt = "%%%dd" % lineno_width
+ fields.append(lineno_fmt % self.starts_line)
+ else:
+ fields.append(' ' * lineno_width)
+ # Column: Current instruction indicator
+ if mark_as_current:
+ fields.append('-->')
+ else:
+ fields.append(' ')
+ # Column: Jump target marker
+ if self.is_jump_target:
+ fields.append('>>')
+ else:
+ fields.append(' ')
+ # Column: Instruction offset from start of code sequence
+ fields.append(repr(self.offset).rjust(offset_width))
+ # Column: Opcode name
+ fields.append(self.opname.ljust(_OPNAME_WIDTH))
+ # Column: Opcode argument
+ if self.arg is not None:
+ fields.append(repr(self.arg).rjust(_OPARG_WIDTH))
+ # Column: Opcode argument details
+ if self.argrepr:
+ fields.append('(' + self.argrepr + ')')
+ return ' '.join(fields).rstrip()
+
+
+def get_instructions(x, *, first_line=None):
+ """Iterator for the opcodes in methods, functions or code
+
+ Generates a series of Instruction named tuples giving the details of
+ each operations in the supplied code.
+
+ If *first_line* is not None, it indicates the line number that should
+ be reported for the first source line in the disassembled code.
+ Otherwise, the source line information (if any) is taken directly from
+ the disassembled code object.
+ """
+ co = _get_code_object(x)
+ cell_names = co.co_cellvars + co.co_freevars
+ linestarts = dict(findlinestarts(co))
+ if first_line is not None:
+ line_offset = first_line - co.co_firstlineno
+ else:
+ line_offset = 0
+ return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
+ co.co_consts, cell_names, linestarts,
+ line_offset)
+
+def _get_const_info(const_index, const_list):
+ """Helper to get optional details about const references
+
+ Returns the dereferenced constant and its repr if the constant
+ list is defined.
+ Otherwise returns the constant index and its repr().
+ """
+ argval = const_index
+ if const_list is not None:
+ argval = const_list[const_index]
+ return argval, repr(argval)
+
+def _get_name_info(name_index, name_list):
+ """Helper to get optional details about named references
+
+ Returns the dereferenced name as both value and repr if the name
+ list is defined.
+ Otherwise returns the name index and its repr().
+ """
+ argval = name_index
+ if name_list is not None:
+ argval = name_list[name_index]
+ argrepr = argval
+ else:
+ argrepr = repr(argval)
+ return argval, argrepr
+
+
+def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
+ cells=None, linestarts=None, line_offset=0):
+ """Iterate over the instructions in a bytecode string.
+
+ Generates a sequence of Instruction namedtuples giving the details of each
+ opcode. Additional information about the code's runtime environment
+ (e.g. variable names, constants) can be specified using optional
+ arguments.
+
+ """
+ labels = findlabels(code)
+ starts_line = None
+ for offset, op, arg in _unpack_opargs(code):
+ if linestarts is not None:
+ starts_line = linestarts.get(offset, None)
+ if starts_line is not None:
+ starts_line += line_offset
+ is_jump_target = offset in labels
+ argval = None
+ argrepr = ''
+ if arg is not None:
+ # Set argval to the dereferenced value of the argument when
+ # available, and argrepr to the string representation of argval.
+ # _disassemble_bytes needs the string repr of the
+ # raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
+ argval = arg
+ if op in hasconst:
+ argval, argrepr = _get_const_info(arg, constants)
+ elif op in hasname:
+ argval, argrepr = _get_name_info(arg, names)
+ elif op in hasjrel:
+ argval = offset + 2 + arg
+ argrepr = "to " + repr(argval)
+ elif op in haslocal:
+ argval, argrepr = _get_name_info(arg, varnames)
+ elif op in hascompare:
+ argval = cmp_op[arg]
+ argrepr = argval
+ elif op in hasfree:
+ argval, argrepr = _get_name_info(arg, cells)
+ elif op == FORMAT_VALUE:
+ argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x3]
+ argval = (argval, bool(arg & 0x4))
+ if argval[1]:
+ if argrepr:
+ argrepr += ', '
+ argrepr += 'with format'
+ elif op == MAKE_FUNCTION:
+ argrepr = ', '.join(s for i, s in enumerate(MAKE_FUNCTION_FLAGS)
+ if arg & (1< 0:
+ if depth is not None:
+ depth = depth - 1
+ for x in co.co_consts:
+ if hasattr(x, 'co_code'):
+ print(file=file)
+ print("Disassembly of %r:" % (x,), file=file)
+ _disassemble_recursive(x, file=file, depth=depth)
+
+def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
+ constants=None, cells=None, linestarts=None,
+ *, file=None, line_offset=0):
+ # Omit the line number column entirely if we have no line number info
+ show_lineno = linestarts is not None
+ if show_lineno:
+ maxlineno = max(linestarts.values()) + line_offset
+ if maxlineno >= 1000:
+ lineno_width = len(str(maxlineno))
+ else:
+ lineno_width = 3
+ else:
+ lineno_width = 0
+ maxoffset = len(code) - 2
+ if maxoffset >= 10000:
+ offset_width = len(str(maxoffset))
+ else:
+ offset_width = 4
+ for instr in _get_instructions_bytes(code, varnames, names,
+ constants, cells, linestarts,
+ line_offset=line_offset):
+ new_source_line = (show_lineno and
+ instr.starts_line is not None and
+ instr.offset > 0)
+ if new_source_line:
+ print(file=file)
+ is_current_instr = instr.offset == lasti
+ print(instr._disassemble(lineno_width, is_current_instr, offset_width),
+ file=file)
+
+def _disassemble_str(source, **kwargs):
+ """Compile the source string, then disassemble the code object."""
+ _disassemble_recursive(_try_compile(source, ''), **kwargs)
+
+disco = disassemble # XXX For backwards compatibility
+
+def _unpack_opargs(code):
+ extended_arg = 0
+ for i in range(0, len(code), 2):
+ op = code[i]
+ if op >= HAVE_ARGUMENT:
+ arg = code[i+1] | extended_arg
+ extended_arg = (arg << 8) if op == EXTENDED_ARG else 0
+ else:
+ arg = None
+ yield (i, op, arg)
+
+def findlabels(code):
+ """Detect all offsets in a byte code which are jump targets.
+
+ Return the list of offsets.
+
+ """
+ labels = []
+ for offset, op, arg in _unpack_opargs(code):
+ if arg is not None:
+ if op in hasjrel:
+ label = offset + 2 + arg
+ elif op in hasjabs:
+ label = arg
+ else:
+ continue
+ if label not in labels:
+ labels.append(label)
+ return labels
+
+def findlinestarts(code):
+ """Find the offsets in a byte code which are start of lines in the source.
+
+ Generate pairs (offset, lineno) as described in Python/compile.c.
+
+ """
+ byte_increments = code.co_lnotab[0::2]
+ line_increments = code.co_lnotab[1::2]
+ bytecode_len = len(code.co_code)
+
+ lastlineno = None
+ lineno = code.co_firstlineno
+ addr = 0
+ for byte_incr, line_incr in zip(byte_increments, line_increments):
+ if byte_incr:
+ if lineno != lastlineno:
+ yield (addr, lineno)
+ lastlineno = lineno
+ addr += byte_incr
+ if addr >= bytecode_len:
+ # The rest of the lnotab byte offsets are past the end of
+ # the bytecode, so the lines were optimized away.
+ return
+ if line_incr >= 0x80:
+ # line_increments is an array of 8-bit signed integers
+ line_incr -= 0x100
+ lineno += line_incr
+ if lineno != lastlineno:
+ yield (addr, lineno)
+
+class Bytecode:
+ """The bytecode operations of a piece of code
+
+ Instantiate this with a function, method, other compiled object, string of
+ code, or a code object (as returned by compile()).
+
+ Iterating over this yields the bytecode operations as Instruction instances.
+ """
+ def __init__(self, x, *, first_line=None, current_offset=None):
+ self.codeobj = co = _get_code_object(x)
+ if first_line is None:
+ self.first_line = co.co_firstlineno
+ self._line_offset = 0
+ else:
+ self.first_line = first_line
+ self._line_offset = first_line - co.co_firstlineno
+ self._cell_names = co.co_cellvars + co.co_freevars
+ self._linestarts = dict(findlinestarts(co))
+ self._original_object = x
+ self.current_offset = current_offset
+
+ def __iter__(self):
+ co = self.codeobj
+ return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
+ co.co_consts, self._cell_names,
+ self._linestarts,
+ line_offset=self._line_offset)
+
+ def __repr__(self):
+ return "{}({!r})".format(self.__class__.__name__,
+ self._original_object)
+
+ @classmethod
+ def from_traceback(cls, tb):
+ """ Construct a Bytecode from the given traceback """
+ while tb.tb_next:
+ tb = tb.tb_next
+ return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti)
+
+ def info(self):
+ """Return formatted information about the code object."""
+ return _format_code_info(self.codeobj)
+
+ def dis(self):
+ """Return a formatted view of the bytecode operations."""
+ co = self.codeobj
+ if self.current_offset is not None:
+ offset = self.current_offset
+ else:
+ offset = -1
+ with io.StringIO() as output:
+ _disassemble_bytes(co.co_code, varnames=co.co_varnames,
+ names=co.co_names, constants=co.co_consts,
+ cells=self._cell_names,
+ linestarts=self._linestarts,
+ line_offset=self._line_offset,
+ file=output,
+ lasti=offset)
+ return output.getvalue()
+
+
+def _test():
+ """Simple test program to disassemble a file."""
+ import argparse
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument('infile', type=argparse.FileType(), nargs='?', default='-')
+ args = parser.parse_args()
+ with args.infile as infile:
+ source = infile.read()
+ code = compile(source, args.infile.name, "exec")
+ dis(code)
+
+if __name__ == "__main__":
+ _test()
diff --git a/dist/lib/doctest.py b/dist/lib/doctest.py
new file mode 100644
index 0000000..ee71984
--- /dev/null
+++ b/dist/lib/doctest.py
@@ -0,0 +1,2793 @@
+# Module doctest.
+# Released to the public domain 16-Jan-2001, by Tim Peters (tim@python.org).
+# Major enhancements and refactoring by:
+# Jim Fulton
+# Edward Loper
+
+# Provided as-is; use at your own risk; no warranty; no promises; enjoy!
+
+r"""Module doctest -- a framework for running examples in docstrings.
+
+In simplest use, end each module M to be tested with:
+
+def _test():
+ import doctest
+ doctest.testmod()
+
+if __name__ == "__main__":
+ _test()
+
+Then running the module as a script will cause the examples in the
+docstrings to get executed and verified:
+
+python M.py
+
+This won't display anything unless an example fails, in which case the
+failing example(s) and the cause(s) of the failure(s) are printed to stdout
+(why not stderr? because stderr is a lame hack <0.2 wink>), and the final
+line of output is "Test failed.".
+
+Run it with the -v switch instead:
+
+python M.py -v
+
+and a detailed report of all examples tried is printed to stdout, along
+with assorted summaries at the end.
+
+You can force verbose mode by passing "verbose=True" to testmod, or prohibit
+it by passing "verbose=False". In either of those cases, sys.argv is not
+examined by testmod.
+
+There are a variety of other ways to run doctests, including integration
+with the unittest framework, and support for running non-Python text
+files containing doctests. There are also many ways to override parts
+of doctest's default behaviors. See the Library Reference Manual for
+details.
+"""
+
+__docformat__ = 'reStructuredText en'
+
+__all__ = [
+ # 0, Option Flags
+ 'register_optionflag',
+ 'DONT_ACCEPT_TRUE_FOR_1',
+ 'DONT_ACCEPT_BLANKLINE',
+ 'NORMALIZE_WHITESPACE',
+ 'ELLIPSIS',
+ 'SKIP',
+ 'IGNORE_EXCEPTION_DETAIL',
+ 'COMPARISON_FLAGS',
+ 'REPORT_UDIFF',
+ 'REPORT_CDIFF',
+ 'REPORT_NDIFF',
+ 'REPORT_ONLY_FIRST_FAILURE',
+ 'REPORTING_FLAGS',
+ 'FAIL_FAST',
+ # 1. Utility Functions
+ # 2. Example & DocTest
+ 'Example',
+ 'DocTest',
+ # 3. Doctest Parser
+ 'DocTestParser',
+ # 4. Doctest Finder
+ 'DocTestFinder',
+ # 5. Doctest Runner
+ 'DocTestRunner',
+ 'OutputChecker',
+ 'DocTestFailure',
+ 'UnexpectedException',
+ 'DebugRunner',
+ # 6. Test Functions
+ 'testmod',
+ 'testfile',
+ 'run_docstring_examples',
+ # 7. Unittest Support
+ 'DocTestSuite',
+ 'DocFileSuite',
+ 'set_unittest_reportflags',
+ # 8. Debugging Support
+ 'script_from_examples',
+ 'testsource',
+ 'debug_src',
+ 'debug',
+]
+
+import __future__
+import difflib
+import inspect
+import linecache
+import os
+import pdb
+import re
+import sys
+import traceback
+import unittest
+from io import StringIO
+from collections import namedtuple
+
+TestResults = namedtuple('TestResults', 'failed attempted')
+
+# There are 4 basic classes:
+# - Example: a pair, plus an intra-docstring line number.
+# - DocTest: a collection of examples, parsed from a docstring, plus
+# info about where the docstring came from (name, filename, lineno).
+# - DocTestFinder: extracts DocTests from a given object's docstring and
+# its contained objects' docstrings.
+# - DocTestRunner: runs DocTest cases, and accumulates statistics.
+#
+# So the basic picture is:
+#
+# list of:
+# +------+ +---------+ +-------+
+# |object| --DocTestFinder-> | DocTest | --DocTestRunner-> |results|
+# +------+ +---------+ +-------+
+# | Example |
+# | ... |
+# | Example |
+# +---------+
+
+# Option constants.
+
+OPTIONFLAGS_BY_NAME = {}
+def register_optionflag(name):
+ # Create a new flag unless `name` is already known.
+ return OPTIONFLAGS_BY_NAME.setdefault(name, 1 << len(OPTIONFLAGS_BY_NAME))
+
+DONT_ACCEPT_TRUE_FOR_1 = register_optionflag('DONT_ACCEPT_TRUE_FOR_1')
+DONT_ACCEPT_BLANKLINE = register_optionflag('DONT_ACCEPT_BLANKLINE')
+NORMALIZE_WHITESPACE = register_optionflag('NORMALIZE_WHITESPACE')
+ELLIPSIS = register_optionflag('ELLIPSIS')
+SKIP = register_optionflag('SKIP')
+IGNORE_EXCEPTION_DETAIL = register_optionflag('IGNORE_EXCEPTION_DETAIL')
+
+COMPARISON_FLAGS = (DONT_ACCEPT_TRUE_FOR_1 |
+ DONT_ACCEPT_BLANKLINE |
+ NORMALIZE_WHITESPACE |
+ ELLIPSIS |
+ SKIP |
+ IGNORE_EXCEPTION_DETAIL)
+
+REPORT_UDIFF = register_optionflag('REPORT_UDIFF')
+REPORT_CDIFF = register_optionflag('REPORT_CDIFF')
+REPORT_NDIFF = register_optionflag('REPORT_NDIFF')
+REPORT_ONLY_FIRST_FAILURE = register_optionflag('REPORT_ONLY_FIRST_FAILURE')
+FAIL_FAST = register_optionflag('FAIL_FAST')
+
+REPORTING_FLAGS = (REPORT_UDIFF |
+ REPORT_CDIFF |
+ REPORT_NDIFF |
+ REPORT_ONLY_FIRST_FAILURE |
+ FAIL_FAST)
+
+# Special string markers for use in `want` strings:
+BLANKLINE_MARKER = ''
+ELLIPSIS_MARKER = '...'
+
+######################################################################
+## Table of Contents
+######################################################################
+# 1. Utility Functions
+# 2. Example & DocTest -- store test cases
+# 3. DocTest Parser -- extracts examples from strings
+# 4. DocTest Finder -- extracts test cases from objects
+# 5. DocTest Runner -- runs test cases
+# 6. Test Functions -- convenient wrappers for testing
+# 7. Unittest Support
+# 8. Debugging Support
+# 9. Example Usage
+
+######################################################################
+## 1. Utility Functions
+######################################################################
+
+def _extract_future_flags(globs):
+ """
+ Return the compiler-flags associated with the future features that
+ have been imported into the given namespace (globs).
+ """
+ flags = 0
+ for fname in __future__.all_feature_names:
+ feature = globs.get(fname, None)
+ if feature is getattr(__future__, fname):
+ flags |= feature.compiler_flag
+ return flags
+
+def _normalize_module(module, depth=2):
+ """
+ Return the module specified by `module`. In particular:
+ - If `module` is a module, then return module.
+ - If `module` is a string, then import and return the
+ module with that name.
+ - If `module` is None, then return the calling module.
+ The calling module is assumed to be the module of
+ the stack frame at the given depth in the call stack.
+ """
+ if inspect.ismodule(module):
+ return module
+ elif isinstance(module, str):
+ return __import__(module, globals(), locals(), ["*"])
+ elif module is None:
+ return sys.modules[sys._getframe(depth).f_globals['__name__']]
+ else:
+ raise TypeError("Expected a module, string, or None")
+
+def _newline_convert(data):
+ # We have two cases to cover and we need to make sure we do
+ # them in the right order
+ for newline in ('\r\n', '\r'):
+ data = data.replace(newline, '\n')
+ return data
+
+def _load_testfile(filename, package, module_relative, encoding):
+ if module_relative:
+ package = _normalize_module(package, 3)
+ filename = _module_relative_path(package, filename)
+ if getattr(package, '__loader__', None) is not None:
+ if hasattr(package.__loader__, 'get_data'):
+ file_contents = package.__loader__.get_data(filename)
+ file_contents = file_contents.decode(encoding)
+ # get_data() opens files as 'rb', so one must do the equivalent
+ # conversion as universal newlines would do.
+ return _newline_convert(file_contents), filename
+ with open(filename, encoding=encoding) as f:
+ return f.read(), filename
+
+def _indent(s, indent=4):
+ """
+ Add the given number of space characters to the beginning of
+ every non-blank line in `s`, and return the result.
+ """
+ # This regexp matches the start of non-blank lines:
+ return re.sub('(?m)^(?!$)', indent*' ', s)
+
+def _exception_traceback(exc_info):
+ """
+ Return a string containing a traceback message for the given
+ exc_info tuple (as returned by sys.exc_info()).
+ """
+ # Get a traceback message.
+ excout = StringIO()
+ exc_type, exc_val, exc_tb = exc_info
+ traceback.print_exception(exc_type, exc_val, exc_tb, file=excout)
+ return excout.getvalue()
+
+# Override some StringIO methods.
+class _SpoofOut(StringIO):
+ def getvalue(self):
+ result = StringIO.getvalue(self)
+ # If anything at all was written, make sure there's a trailing
+ # newline. There's no way for the expected output to indicate
+ # that a trailing newline is missing.
+ if result and not result.endswith("\n"):
+ result += "\n"
+ return result
+
+ def truncate(self, size=None):
+ self.seek(size)
+ StringIO.truncate(self)
+
+# Worst-case linear-time ellipsis matching.
+def _ellipsis_match(want, got):
+ """
+ Essentially the only subtle case:
+ >>> _ellipsis_match('aa...aa', 'aaa')
+ False
+ """
+ if ELLIPSIS_MARKER not in want:
+ return want == got
+
+ # Find "the real" strings.
+ ws = want.split(ELLIPSIS_MARKER)
+ assert len(ws) >= 2
+
+ # Deal with exact matches possibly needed at one or both ends.
+ startpos, endpos = 0, len(got)
+ w = ws[0]
+ if w: # starts with exact match
+ if got.startswith(w):
+ startpos = len(w)
+ del ws[0]
+ else:
+ return False
+ w = ws[-1]
+ if w: # ends with exact match
+ if got.endswith(w):
+ endpos -= len(w)
+ del ws[-1]
+ else:
+ return False
+
+ if startpos > endpos:
+ # Exact end matches required more characters than we have, as in
+ # _ellipsis_match('aa...aa', 'aaa')
+ return False
+
+ # For the rest, we only need to find the leftmost non-overlapping
+ # match for each piece. If there's no overall match that way alone,
+ # there's no overall match period.
+ for w in ws:
+ # w may be '' at times, if there are consecutive ellipses, or
+ # due to an ellipsis at the start or end of `want`. That's OK.
+ # Search for an empty string succeeds, and doesn't change startpos.
+ startpos = got.find(w, startpos, endpos)
+ if startpos < 0:
+ return False
+ startpos += len(w)
+
+ return True
+
+def _comment_line(line):
+ "Return a commented form of the given line"
+ line = line.rstrip()
+ if line:
+ return '# '+line
+ else:
+ return '#'
+
+def _strip_exception_details(msg):
+ # Support for IGNORE_EXCEPTION_DETAIL.
+ # Get rid of everything except the exception name; in particular, drop
+ # the possibly dotted module path (if any) and the exception message (if
+ # any). We assume that a colon is never part of a dotted name, or of an
+ # exception name.
+ # E.g., given
+ # "foo.bar.MyError: la di da"
+ # return "MyError"
+ # Or for "abc.def" or "abc.def:\n" return "def".
+
+ start, end = 0, len(msg)
+ # The exception name must appear on the first line.
+ i = msg.find("\n")
+ if i >= 0:
+ end = i
+ # retain up to the first colon (if any)
+ i = msg.find(':', 0, end)
+ if i >= 0:
+ end = i
+ # retain just the exception name
+ i = msg.rfind('.', 0, end)
+ if i >= 0:
+ start = i+1
+ return msg[start: end]
+
+class _OutputRedirectingPdb(pdb.Pdb):
+ """
+ A specialized version of the python debugger that redirects stdout
+ to a given stream when interacting with the user. Stdout is *not*
+ redirected when traced code is executed.
+ """
+ def __init__(self, out):
+ self.__out = out
+ self.__debugger_used = False
+ # do not play signal games in the pdb
+ pdb.Pdb.__init__(self, stdout=out, nosigint=True)
+ # still use input() to get user input
+ self.use_rawinput = 1
+
+ def set_trace(self, frame=None):
+ self.__debugger_used = True
+ if frame is None:
+ frame = sys._getframe().f_back
+ pdb.Pdb.set_trace(self, frame)
+
+ def set_continue(self):
+ # Calling set_continue unconditionally would break unit test
+ # coverage reporting, as Bdb.set_continue calls sys.settrace(None).
+ if self.__debugger_used:
+ pdb.Pdb.set_continue(self)
+
+ def trace_dispatch(self, *args):
+ # Redirect stdout to the given stream.
+ save_stdout = sys.stdout
+ sys.stdout = self.__out
+ # Call Pdb's trace dispatch method.
+ try:
+ return pdb.Pdb.trace_dispatch(self, *args)
+ finally:
+ sys.stdout = save_stdout
+
+# [XX] Normalize with respect to os.path.pardir?
+def _module_relative_path(module, test_path):
+ if not inspect.ismodule(module):
+ raise TypeError('Expected a module: %r' % module)
+ if test_path.startswith('/'):
+ raise ValueError('Module-relative files may not have absolute paths')
+
+ # Normalize the path. On Windows, replace "/" with "\".
+ test_path = os.path.join(*(test_path.split('/')))
+
+ # Find the base directory for the path.
+ if hasattr(module, '__file__'):
+ # A normal module/package
+ basedir = os.path.split(module.__file__)[0]
+ elif module.__name__ == '__main__':
+ # An interactive session.
+ if len(sys.argv)>0 and sys.argv[0] != '':
+ basedir = os.path.split(sys.argv[0])[0]
+ else:
+ basedir = os.curdir
+ else:
+ if hasattr(module, '__path__'):
+ for directory in module.__path__:
+ fullpath = os.path.join(directory, test_path)
+ if os.path.exists(fullpath):
+ return fullpath
+
+ # A module w/o __file__ (this includes builtins)
+ raise ValueError("Can't resolve paths relative to the module "
+ "%r (it has no __file__)"
+ % module.__name__)
+
+ # Combine the base directory and the test path.
+ return os.path.join(basedir, test_path)
+
+######################################################################
+## 2. Example & DocTest
+######################################################################
+## - An "example" is a pair, where "source" is a
+## fragment of source code, and "want" is the expected output for
+## "source." The Example class also includes information about
+## where the example was extracted from.
+##
+## - A "doctest" is a collection of examples, typically extracted from
+## a string (such as an object's docstring). The DocTest class also
+## includes information about where the string was extracted from.
+
+class Example:
+ """
+ A single doctest example, consisting of source code and expected
+ output. `Example` defines the following attributes:
+
+ - source: A single Python statement, always ending with a newline.
+ The constructor adds a newline if needed.
+
+ - want: The expected output from running the source code (either
+ from stdout, or a traceback in case of exception). `want` ends
+ with a newline unless it's empty, in which case it's an empty
+ string. The constructor adds a newline if needed.
+
+ - exc_msg: The exception message generated by the example, if
+ the example is expected to generate an exception; or `None` if
+ it is not expected to generate an exception. This exception
+ message is compared against the return value of
+ `traceback.format_exception_only()`. `exc_msg` ends with a
+ newline unless it's `None`. The constructor adds a newline
+ if needed.
+
+ - lineno: The line number within the DocTest string containing
+ this Example where the Example begins. This line number is
+ zero-based, with respect to the beginning of the DocTest.
+
+ - indent: The example's indentation in the DocTest string.
+ I.e., the number of space characters that precede the
+ example's first prompt.
+
+ - options: A dictionary mapping from option flags to True or
+ False, which is used to override default options for this
+ example. Any option flags not contained in this dictionary
+ are left at their default value (as specified by the
+ DocTestRunner's optionflags). By default, no options are set.
+ """
+ def __init__(self, source, want, exc_msg=None, lineno=0, indent=0,
+ options=None):
+ # Normalize inputs.
+ if not source.endswith('\n'):
+ source += '\n'
+ if want and not want.endswith('\n'):
+ want += '\n'
+ if exc_msg is not None and not exc_msg.endswith('\n'):
+ exc_msg += '\n'
+ # Store properties.
+ self.source = source
+ self.want = want
+ self.lineno = lineno
+ self.indent = indent
+ if options is None: options = {}
+ self.options = options
+ self.exc_msg = exc_msg
+
+ def __eq__(self, other):
+ if type(self) is not type(other):
+ return NotImplemented
+
+ return self.source == other.source and \
+ self.want == other.want and \
+ self.lineno == other.lineno and \
+ self.indent == other.indent and \
+ self.options == other.options and \
+ self.exc_msg == other.exc_msg
+
+ def __hash__(self):
+ return hash((self.source, self.want, self.lineno, self.indent,
+ self.exc_msg))
+
+class DocTest:
+ """
+ A collection of doctest examples that should be run in a single
+ namespace. Each `DocTest` defines the following attributes:
+
+ - examples: the list of examples.
+
+ - globs: The namespace (aka globals) that the examples should
+ be run in.
+
+ - name: A name identifying the DocTest (typically, the name of
+ the object whose docstring this DocTest was extracted from).
+
+ - filename: The name of the file that this DocTest was extracted
+ from, or `None` if the filename is unknown.
+
+ - lineno: The line number within filename where this DocTest
+ begins, or `None` if the line number is unavailable. This
+ line number is zero-based, with respect to the beginning of
+ the file.
+
+ - docstring: The string that the examples were extracted from,
+ or `None` if the string is unavailable.
+ """
+ def __init__(self, examples, globs, name, filename, lineno, docstring):
+ """
+ Create a new DocTest containing the given examples. The
+ DocTest's globals are initialized with a copy of `globs`.
+ """
+ assert not isinstance(examples, str), \
+ "DocTest no longer accepts str; use DocTestParser instead"
+ self.examples = examples
+ self.docstring = docstring
+ self.globs = globs.copy()
+ self.name = name
+ self.filename = filename
+ self.lineno = lineno
+
+ def __repr__(self):
+ if len(self.examples) == 0:
+ examples = 'no examples'
+ elif len(self.examples) == 1:
+ examples = '1 example'
+ else:
+ examples = '%d examples' % len(self.examples)
+ return ('<%s %s from %s:%s (%s)>' %
+ (self.__class__.__name__,
+ self.name, self.filename, self.lineno, examples))
+
+ def __eq__(self, other):
+ if type(self) is not type(other):
+ return NotImplemented
+
+ return self.examples == other.examples and \
+ self.docstring == other.docstring and \
+ self.globs == other.globs and \
+ self.name == other.name and \
+ self.filename == other.filename and \
+ self.lineno == other.lineno
+
+ def __hash__(self):
+ return hash((self.docstring, self.name, self.filename, self.lineno))
+
+ # This lets us sort tests by name:
+ def __lt__(self, other):
+ if not isinstance(other, DocTest):
+ return NotImplemented
+ return ((self.name, self.filename, self.lineno, id(self))
+ <
+ (other.name, other.filename, other.lineno, id(other)))
+
+######################################################################
+## 3. DocTestParser
+######################################################################
+
+class DocTestParser:
+ """
+ A class used to parse strings containing doctest examples.
+ """
+ # This regular expression is used to find doctest examples in a
+ # string. It defines three groups: `source` is the source code
+ # (including leading indentation and prompts); `indent` is the
+ # indentation of the first (PS1) line of the source code; and
+ # `want` is the expected output (including leading indentation).
+ _EXAMPLE_RE = re.compile(r'''
+ # Source consists of a PS1 line followed by zero or more PS2 lines.
+ (?P
+ (?:^(?P [ ]*) >>> .*) # PS1 line
+ (?:\n [ ]* \.\.\. .*)*) # PS2 lines
+ \n?
+ # Want consists of any non-blank lines that do not start with PS1.
+ (?P (?:(?![ ]*$) # Not a blank line
+ (?![ ]*>>>) # Not a line starting with PS1
+ .+$\n? # But any other line
+ )*)
+ ''', re.MULTILINE | re.VERBOSE)
+
+ # A regular expression for handling `want` strings that contain
+ # expected exceptions. It divides `want` into three pieces:
+ # - the traceback header line (`hdr`)
+ # - the traceback stack (`stack`)
+ # - the exception message (`msg`), as generated by
+ # traceback.format_exception_only()
+ # `msg` may have multiple lines. We assume/require that the
+ # exception message is the first non-indented line starting with a word
+ # character following the traceback header line.
+ _EXCEPTION_RE = re.compile(r"""
+ # Grab the traceback header. Different versions of Python have
+ # said different things on the first traceback line.
+ ^(?P Traceback\ \(
+ (?: most\ recent\ call\ last
+ | innermost\ last
+ ) \) :
+ )
+ \s* $ # toss trailing whitespace on the header.
+ (?P .*?) # don't blink: absorb stuff until...
+ ^ (?P \w+ .*) # a line *starts* with alphanum.
+ """, re.VERBOSE | re.MULTILINE | re.DOTALL)
+
+ # A callable returning a true value iff its argument is a blank line
+ # or contains a single comment.
+ _IS_BLANK_OR_COMMENT = re.compile(r'^[ ]*(#.*)?$').match
+
+ def parse(self, string, name=''):
+ """
+ Divide the given string into examples and intervening text,
+ and return them as a list of alternating Examples and strings.
+ Line numbers for the Examples are 0-based. The optional
+ argument `name` is a name identifying this string, and is only
+ used for error messages.
+ """
+ string = string.expandtabs()
+ # If all lines begin with the same indentation, then strip it.
+ min_indent = self._min_indent(string)
+ if min_indent > 0:
+ string = '\n'.join([l[min_indent:] for l in string.split('\n')])
+
+ output = []
+ charno, lineno = 0, 0
+ # Find all doctest examples in the string:
+ for m in self._EXAMPLE_RE.finditer(string):
+ # Add the pre-example text to `output`.
+ output.append(string[charno:m.start()])
+ # Update lineno (lines before this example)
+ lineno += string.count('\n', charno, m.start())
+ # Extract info from the regexp match.
+ (source, options, want, exc_msg) = \
+ self._parse_example(m, name, lineno)
+ # Create an Example, and add it to the list.
+ if not self._IS_BLANK_OR_COMMENT(source):
+ output.append( Example(source, want, exc_msg,
+ lineno=lineno,
+ indent=min_indent+len(m.group('indent')),
+ options=options) )
+ # Update lineno (lines inside this example)
+ lineno += string.count('\n', m.start(), m.end())
+ # Update charno.
+ charno = m.end()
+ # Add any remaining post-example text to `output`.
+ output.append(string[charno:])
+ return output
+
+ def get_doctest(self, string, globs, name, filename, lineno):
+ """
+ Extract all doctest examples from the given string, and
+ collect them into a `DocTest` object.
+
+ `globs`, `name`, `filename`, and `lineno` are attributes for
+ the new `DocTest` object. See the documentation for `DocTest`
+ for more information.
+ """
+ return DocTest(self.get_examples(string, name), globs,
+ name, filename, lineno, string)
+
+ def get_examples(self, string, name=''):
+ """
+ Extract all doctest examples from the given string, and return
+ them as a list of `Example` objects. Line numbers are
+ 0-based, because it's most common in doctests that nothing
+ interesting appears on the same line as opening triple-quote,
+ and so the first interesting line is called \"line 1\" then.
+
+ The optional argument `name` is a name identifying this
+ string, and is only used for error messages.
+ """
+ return [x for x in self.parse(string, name)
+ if isinstance(x, Example)]
+
+ def _parse_example(self, m, name, lineno):
+ """
+ Given a regular expression match from `_EXAMPLE_RE` (`m`),
+ return a pair `(source, want)`, where `source` is the matched
+ example's source code (with prompts and indentation stripped);
+ and `want` is the example's expected output (with indentation
+ stripped).
+
+ `name` is the string's name, and `lineno` is the line number
+ where the example starts; both are used for error messages.
+ """
+ # Get the example's indentation level.
+ indent = len(m.group('indent'))
+
+ # Divide source into lines; check that they're properly
+ # indented; and then strip their indentation & prompts.
+ source_lines = m.group('source').split('\n')
+ self._check_prompt_blank(source_lines, indent, name, lineno)
+ self._check_prefix(source_lines[1:], ' '*indent + '.', name, lineno)
+ source = '\n'.join([sl[indent+4:] for sl in source_lines])
+
+ # Divide want into lines; check that it's properly indented; and
+ # then strip the indentation. Spaces before the last newline should
+ # be preserved, so plain rstrip() isn't good enough.
+ want = m.group('want')
+ want_lines = want.split('\n')
+ if len(want_lines) > 1 and re.match(r' *$', want_lines[-1]):
+ del want_lines[-1] # forget final newline & spaces after it
+ self._check_prefix(want_lines, ' '*indent, name,
+ lineno + len(source_lines))
+ want = '\n'.join([wl[indent:] for wl in want_lines])
+
+ # If `want` contains a traceback message, then extract it.
+ m = self._EXCEPTION_RE.match(want)
+ if m:
+ exc_msg = m.group('msg')
+ else:
+ exc_msg = None
+
+ # Extract options from the source.
+ options = self._find_options(source, name, lineno)
+
+ return source, options, want, exc_msg
+
+ # This regular expression looks for option directives in the
+ # source code of an example. Option directives are comments
+ # starting with "doctest:". Warning: this may give false
+ # positives for string-literals that contain the string
+ # "#doctest:". Eliminating these false positives would require
+ # actually parsing the string; but we limit them by ignoring any
+ # line containing "#doctest:" that is *followed* by a quote mark.
+ _OPTION_DIRECTIVE_RE = re.compile(r'#\s*doctest:\s*([^\n\'"]*)$',
+ re.MULTILINE)
+
+ def _find_options(self, source, name, lineno):
+ """
+ Return a dictionary containing option overrides extracted from
+ option directives in the given source string.
+
+ `name` is the string's name, and `lineno` is the line number
+ where the example starts; both are used for error messages.
+ """
+ options = {}
+ # (note: with the current regexp, this will match at most once:)
+ for m in self._OPTION_DIRECTIVE_RE.finditer(source):
+ option_strings = m.group(1).replace(',', ' ').split()
+ for option in option_strings:
+ if (option[0] not in '+-' or
+ option[1:] not in OPTIONFLAGS_BY_NAME):
+ raise ValueError('line %r of the doctest for %s '
+ 'has an invalid option: %r' %
+ (lineno+1, name, option))
+ flag = OPTIONFLAGS_BY_NAME[option[1:]]
+ options[flag] = (option[0] == '+')
+ if options and self._IS_BLANK_OR_COMMENT(source):
+ raise ValueError('line %r of the doctest for %s has an option '
+ 'directive on a line with no example: %r' %
+ (lineno, name, source))
+ return options
+
+ # This regular expression finds the indentation of every non-blank
+ # line in a string.
+ _INDENT_RE = re.compile(r'^([ ]*)(?=\S)', re.MULTILINE)
+
+ def _min_indent(self, s):
+ "Return the minimum indentation of any non-blank line in `s`"
+ indents = [len(indent) for indent in self._INDENT_RE.findall(s)]
+ if len(indents) > 0:
+ return min(indents)
+ else:
+ return 0
+
+ def _check_prompt_blank(self, lines, indent, name, lineno):
+ """
+ Given the lines of a source string (including prompts and
+ leading indentation), check to make sure that every prompt is
+ followed by a space character. If any line is not followed by
+ a space character, then raise ValueError.
+ """
+ for i, line in enumerate(lines):
+ if len(line) >= indent+4 and line[indent+3] != ' ':
+ raise ValueError('line %r of the docstring for %s '
+ 'lacks blank after %s: %r' %
+ (lineno+i+1, name,
+ line[indent:indent+3], line))
+
+ def _check_prefix(self, lines, prefix, name, lineno):
+ """
+ Check that every line in the given list starts with the given
+ prefix; if any line does not, then raise a ValueError.
+ """
+ for i, line in enumerate(lines):
+ if line and not line.startswith(prefix):
+ raise ValueError('line %r of the docstring for %s has '
+ 'inconsistent leading whitespace: %r' %
+ (lineno+i+1, name, line))
+
+
+######################################################################
+## 4. DocTest Finder
+######################################################################
+
+class DocTestFinder:
+ """
+ A class used to extract the DocTests that are relevant to a given
+ object, from its docstring and the docstrings of its contained
+ objects. Doctests can currently be extracted from the following
+ object types: modules, functions, classes, methods, staticmethods,
+ classmethods, and properties.
+ """
+
+ def __init__(self, verbose=False, parser=DocTestParser(),
+ recurse=True, exclude_empty=True):
+ """
+ Create a new doctest finder.
+
+ The optional argument `parser` specifies a class or
+ function that should be used to create new DocTest objects (or
+ objects that implement the same interface as DocTest). The
+ signature for this factory function should match the signature
+ of the DocTest constructor.
+
+ If the optional argument `recurse` is false, then `find` will
+ only examine the given object, and not any contained objects.
+
+ If the optional argument `exclude_empty` is false, then `find`
+ will include tests for objects with empty docstrings.
+ """
+ self._parser = parser
+ self._verbose = verbose
+ self._recurse = recurse
+ self._exclude_empty = exclude_empty
+
+ def find(self, obj, name=None, module=None, globs=None, extraglobs=None):
+ """
+ Return a list of the DocTests that are defined by the given
+ object's docstring, or by any of its contained objects'
+ docstrings.
+
+ The optional parameter `module` is the module that contains
+ the given object. If the module is not specified or is None, then
+ the test finder will attempt to automatically determine the
+ correct module. The object's module is used:
+
+ - As a default namespace, if `globs` is not specified.
+ - To prevent the DocTestFinder from extracting DocTests
+ from objects that are imported from other modules.
+ - To find the name of the file containing the object.
+ - To help find the line number of the object within its
+ file.
+
+ Contained objects whose module does not match `module` are ignored.
+
+ If `module` is False, no attempt to find the module will be made.
+ This is obscure, of use mostly in tests: if `module` is False, or
+ is None but cannot be found automatically, then all objects are
+ considered to belong to the (non-existent) module, so all contained
+ objects will (recursively) be searched for doctests.
+
+ The globals for each DocTest is formed by combining `globs`
+ and `extraglobs` (bindings in `extraglobs` override bindings
+ in `globs`). A new copy of the globals dictionary is created
+ for each DocTest. If `globs` is not specified, then it
+ defaults to the module's `__dict__`, if specified, or {}
+ otherwise. If `extraglobs` is not specified, then it defaults
+ to {}.
+
+ """
+ # If name was not specified, then extract it from the object.
+ if name is None:
+ name = getattr(obj, '__name__', None)
+ if name is None:
+ raise ValueError("DocTestFinder.find: name must be given "
+ "when obj.__name__ doesn't exist: %r" %
+ (type(obj),))
+
+ # Find the module that contains the given object (if obj is
+ # a module, then module=obj.). Note: this may fail, in which
+ # case module will be None.
+ if module is False:
+ module = None
+ elif module is None:
+ module = inspect.getmodule(obj)
+
+ # Read the module's source code. This is used by
+ # DocTestFinder._find_lineno to find the line number for a
+ # given object's docstring.
+ try:
+ file = inspect.getsourcefile(obj)
+ except TypeError:
+ source_lines = None
+ else:
+ if not file:
+ # Check to see if it's one of our special internal "files"
+ # (see __patched_linecache_getlines).
+ file = inspect.getfile(obj)
+ if not file[0]+file[-2:] == '<]>': file = None
+ if file is None:
+ source_lines = None
+ else:
+ if module is not None:
+ # Supply the module globals in case the module was
+ # originally loaded via a PEP 302 loader and
+ # file is not a valid filesystem path
+ source_lines = linecache.getlines(file, module.__dict__)
+ else:
+ # No access to a loader, so assume it's a normal
+ # filesystem path
+ source_lines = linecache.getlines(file)
+ if not source_lines:
+ source_lines = None
+
+ # Initialize globals, and merge in extraglobs.
+ if globs is None:
+ if module is None:
+ globs = {}
+ else:
+ globs = module.__dict__.copy()
+ else:
+ globs = globs.copy()
+ if extraglobs is not None:
+ globs.update(extraglobs)
+ if '__name__' not in globs:
+ globs['__name__'] = '__main__' # provide a default module name
+
+ # Recursively explore `obj`, extracting DocTests.
+ tests = []
+ self._find(tests, obj, name, module, source_lines, globs, {})
+ # Sort the tests by alpha order of names, for consistency in
+ # verbose-mode output. This was a feature of doctest in Pythons
+ # <= 2.3 that got lost by accident in 2.4. It was repaired in
+ # 2.4.4 and 2.5.
+ tests.sort()
+ return tests
+
+ def _from_module(self, module, object):
+ """
+ Return true if the given object is defined in the given
+ module.
+ """
+ if module is None:
+ return True
+ elif inspect.getmodule(object) is not None:
+ return module is inspect.getmodule(object)
+ elif inspect.isfunction(object):
+ return module.__dict__ is object.__globals__
+ elif inspect.ismethoddescriptor(object):
+ if hasattr(object, '__objclass__'):
+ obj_mod = object.__objclass__.__module__
+ elif hasattr(object, '__module__'):
+ obj_mod = object.__module__
+ else:
+ return True # [XX] no easy way to tell otherwise
+ return module.__name__ == obj_mod
+ elif inspect.isclass(object):
+ return module.__name__ == object.__module__
+ elif hasattr(object, '__module__'):
+ return module.__name__ == object.__module__
+ elif isinstance(object, property):
+ return True # [XX] no way not be sure.
+ else:
+ raise ValueError("object must be a class or function")
+
+ def _find(self, tests, obj, name, module, source_lines, globs, seen):
+ """
+ Find tests for the given object and any contained objects, and
+ add them to `tests`.
+ """
+ if self._verbose:
+ print('Finding tests in %s' % name)
+
+ # If we've already processed this object, then ignore it.
+ if id(obj) in seen:
+ return
+ seen[id(obj)] = 1
+
+ # Find a test for this object, and add it to the list of tests.
+ test = self._get_test(obj, name, module, globs, source_lines)
+ if test is not None:
+ tests.append(test)
+
+ # Look for tests in a module's contained objects.
+ if inspect.ismodule(obj) and self._recurse:
+ for valname, val in obj.__dict__.items():
+ valname = '%s.%s' % (name, valname)
+ # Recurse to functions & classes.
+ if ((inspect.isroutine(inspect.unwrap(val))
+ or inspect.isclass(val)) and
+ self._from_module(module, val)):
+ self._find(tests, val, valname, module, source_lines,
+ globs, seen)
+
+ # Look for tests in a module's __test__ dictionary.
+ if inspect.ismodule(obj) and self._recurse:
+ for valname, val in getattr(obj, '__test__', {}).items():
+ if not isinstance(valname, str):
+ raise ValueError("DocTestFinder.find: __test__ keys "
+ "must be strings: %r" %
+ (type(valname),))
+ if not (inspect.isroutine(val) or inspect.isclass(val) or
+ inspect.ismodule(val) or isinstance(val, str)):
+ raise ValueError("DocTestFinder.find: __test__ values "
+ "must be strings, functions, methods, "
+ "classes, or modules: %r" %
+ (type(val),))
+ valname = '%s.__test__.%s' % (name, valname)
+ self._find(tests, val, valname, module, source_lines,
+ globs, seen)
+
+ # Look for tests in a class's contained objects.
+ if inspect.isclass(obj) and self._recurse:
+ for valname, val in obj.__dict__.items():
+ # Special handling for staticmethod/classmethod.
+ if isinstance(val, staticmethod):
+ val = getattr(obj, valname)
+ if isinstance(val, classmethod):
+ val = getattr(obj, valname).__func__
+
+ # Recurse to methods, properties, and nested classes.
+ if ((inspect.isroutine(val) or inspect.isclass(val) or
+ isinstance(val, property)) and
+ self._from_module(module, val)):
+ valname = '%s.%s' % (name, valname)
+ self._find(tests, val, valname, module, source_lines,
+ globs, seen)
+
+ def _get_test(self, obj, name, module, globs, source_lines):
+ """
+ Return a DocTest for the given object, if it defines a docstring;
+ otherwise, return None.
+ """
+ # Extract the object's docstring. If it doesn't have one,
+ # then return None (no test for this object).
+ if isinstance(obj, str):
+ docstring = obj
+ else:
+ try:
+ if obj.__doc__ is None:
+ docstring = ''
+ else:
+ docstring = obj.__doc__
+ if not isinstance(docstring, str):
+ docstring = str(docstring)
+ except (TypeError, AttributeError):
+ docstring = ''
+
+ # Find the docstring's location in the file.
+ lineno = self._find_lineno(obj, source_lines)
+
+ # Don't bother if the docstring is empty.
+ if self._exclude_empty and not docstring:
+ return None
+
+ # Return a DocTest for this object.
+ if module is None:
+ filename = None
+ else:
+ # __file__ can be None for namespace packages.
+ filename = getattr(module, '__file__', None) or module.__name__
+ if filename[-4:] == ".pyc":
+ filename = filename[:-1]
+ return self._parser.get_doctest(docstring, globs, name,
+ filename, lineno)
+
+ def _find_lineno(self, obj, source_lines):
+ """
+ Return a line number of the given object's docstring. Note:
+ this method assumes that the object has a docstring.
+ """
+ lineno = None
+
+ # Find the line number for modules.
+ if inspect.ismodule(obj):
+ lineno = 0
+
+ # Find the line number for classes.
+ # Note: this could be fooled if a class is defined multiple
+ # times in a single file.
+ if inspect.isclass(obj):
+ if source_lines is None:
+ return None
+ pat = re.compile(r'^\s*class\s*%s\b' %
+ getattr(obj, '__name__', '-'))
+ for i, line in enumerate(source_lines):
+ if pat.match(line):
+ lineno = i
+ break
+
+ # Find the line number for functions & methods.
+ if inspect.ismethod(obj): obj = obj.__func__
+ if inspect.isfunction(obj): obj = obj.__code__
+ if inspect.istraceback(obj): obj = obj.tb_frame
+ if inspect.isframe(obj): obj = obj.f_code
+ if inspect.iscode(obj):
+ lineno = getattr(obj, 'co_firstlineno', None)-1
+
+ # Find the line number where the docstring starts. Assume
+ # that it's the first line that begins with a quote mark.
+ # Note: this could be fooled by a multiline function
+ # signature, where a continuation line begins with a quote
+ # mark.
+ if lineno is not None:
+ if source_lines is None:
+ return lineno+1
+ pat = re.compile(r'(^|.*:)\s*\w*("|\')')
+ for lineno in range(lineno, len(source_lines)):
+ if pat.match(source_lines[lineno]):
+ return lineno
+
+ # We couldn't find the line number.
+ return None
+
+######################################################################
+## 5. DocTest Runner
+######################################################################
+
+class DocTestRunner:
+ """
+ A class used to run DocTest test cases, and accumulate statistics.
+ The `run` method is used to process a single DocTest case. It
+ returns a tuple `(f, t)`, where `t` is the number of test cases
+ tried, and `f` is the number of test cases that failed.
+
+ >>> tests = DocTestFinder().find(_TestClass)
+ >>> runner = DocTestRunner(verbose=False)
+ >>> tests.sort(key = lambda test: test.name)
+ >>> for test in tests:
+ ... print(test.name, '->', runner.run(test))
+ _TestClass -> TestResults(failed=0, attempted=2)
+ _TestClass.__init__ -> TestResults(failed=0, attempted=2)
+ _TestClass.get -> TestResults(failed=0, attempted=2)
+ _TestClass.square -> TestResults(failed=0, attempted=1)
+
+ The `summarize` method prints a summary of all the test cases that
+ have been run by the runner, and returns an aggregated `(f, t)`
+ tuple:
+
+ >>> runner.summarize(verbose=1)
+ 4 items passed all tests:
+ 2 tests in _TestClass
+ 2 tests in _TestClass.__init__
+ 2 tests in _TestClass.get
+ 1 tests in _TestClass.square
+ 7 tests in 4 items.
+ 7 passed and 0 failed.
+ Test passed.
+ TestResults(failed=0, attempted=7)
+
+ The aggregated number of tried examples and failed examples is
+ also available via the `tries` and `failures` attributes:
+
+ >>> runner.tries
+ 7
+ >>> runner.failures
+ 0
+
+ The comparison between expected outputs and actual outputs is done
+ by an `OutputChecker`. This comparison may be customized with a
+ number of option flags; see the documentation for `testmod` for
+ more information. If the option flags are insufficient, then the
+ comparison may also be customized by passing a subclass of
+ `OutputChecker` to the constructor.
+
+ The test runner's display output can be controlled in two ways.
+ First, an output function (`out) can be passed to
+ `TestRunner.run`; this function will be called with strings that
+ should be displayed. It defaults to `sys.stdout.write`. If
+ capturing the output is not sufficient, then the display output
+ can be also customized by subclassing DocTestRunner, and
+ overriding the methods `report_start`, `report_success`,
+ `report_unexpected_exception`, and `report_failure`.
+ """
+ # This divider string is used to separate failure messages, and to
+ # separate sections of the summary.
+ DIVIDER = "*" * 70
+
+ def __init__(self, checker=None, verbose=None, optionflags=0):
+ """
+ Create a new test runner.
+
+ Optional keyword arg `checker` is the `OutputChecker` that
+ should be used to compare the expected outputs and actual
+ outputs of doctest examples.
+
+ Optional keyword arg 'verbose' prints lots of stuff if true,
+ only failures if false; by default, it's true iff '-v' is in
+ sys.argv.
+
+ Optional argument `optionflags` can be used to control how the
+ test runner compares expected output to actual output, and how
+ it displays failures. See the documentation for `testmod` for
+ more information.
+ """
+ self._checker = checker or OutputChecker()
+ if verbose is None:
+ verbose = '-v' in sys.argv
+ self._verbose = verbose
+ self.optionflags = optionflags
+ self.original_optionflags = optionflags
+
+ # Keep track of the examples we've run.
+ self.tries = 0
+ self.failures = 0
+ self._name2ft = {}
+
+ # Create a fake output target for capturing doctest output.
+ self._fakeout = _SpoofOut()
+
+ #/////////////////////////////////////////////////////////////////
+ # Reporting methods
+ #/////////////////////////////////////////////////////////////////
+
+ def report_start(self, out, test, example):
+ """
+ Report that the test runner is about to process the given
+ example. (Only displays a message if verbose=True)
+ """
+ if self._verbose:
+ if example.want:
+ out('Trying:\n' + _indent(example.source) +
+ 'Expecting:\n' + _indent(example.want))
+ else:
+ out('Trying:\n' + _indent(example.source) +
+ 'Expecting nothing\n')
+
+ def report_success(self, out, test, example, got):
+ """
+ Report that the given example ran successfully. (Only
+ displays a message if verbose=True)
+ """
+ if self._verbose:
+ out("ok\n")
+
+ def report_failure(self, out, test, example, got):
+ """
+ Report that the given example failed.
+ """
+ out(self._failure_header(test, example) +
+ self._checker.output_difference(example, got, self.optionflags))
+
+ def report_unexpected_exception(self, out, test, example, exc_info):
+ """
+ Report that the given example raised an unexpected exception.
+ """
+ out(self._failure_header(test, example) +
+ 'Exception raised:\n' + _indent(_exception_traceback(exc_info)))
+
+ def _failure_header(self, test, example):
+ out = [self.DIVIDER]
+ if test.filename:
+ if test.lineno is not None and example.lineno is not None:
+ lineno = test.lineno + example.lineno + 1
+ else:
+ lineno = '?'
+ out.append('File "%s", line %s, in %s' %
+ (test.filename, lineno, test.name))
+ else:
+ out.append('Line %s, in %s' % (example.lineno+1, test.name))
+ out.append('Failed example:')
+ source = example.source
+ out.append(_indent(source))
+ return '\n'.join(out)
+
+ #/////////////////////////////////////////////////////////////////
+ # DocTest Running
+ #/////////////////////////////////////////////////////////////////
+
+ def __run(self, test, compileflags, out):
+ """
+ Run the examples in `test`. Write the outcome of each example
+ with one of the `DocTestRunner.report_*` methods, using the
+ writer function `out`. `compileflags` is the set of compiler
+ flags that should be used to execute examples. Return a tuple
+ `(f, t)`, where `t` is the number of examples tried, and `f`
+ is the number of examples that failed. The examples are run
+ in the namespace `test.globs`.
+ """
+ # Keep track of the number of failures and tries.
+ failures = tries = 0
+
+ # Save the option flags (since option directives can be used
+ # to modify them).
+ original_optionflags = self.optionflags
+
+ SUCCESS, FAILURE, BOOM = range(3) # `outcome` state
+
+ check = self._checker.check_output
+
+ # Process each example.
+ for examplenum, example in enumerate(test.examples):
+
+ # If REPORT_ONLY_FIRST_FAILURE is set, then suppress
+ # reporting after the first failure.
+ quiet = (self.optionflags & REPORT_ONLY_FIRST_FAILURE and
+ failures > 0)
+
+ # Merge in the example's options.
+ self.optionflags = original_optionflags
+ if example.options:
+ for (optionflag, val) in example.options.items():
+ if val:
+ self.optionflags |= optionflag
+ else:
+ self.optionflags &= ~optionflag
+
+ # If 'SKIP' is set, then skip this example.
+ if self.optionflags & SKIP:
+ continue
+
+ # Record that we started this example.
+ tries += 1
+ if not quiet:
+ self.report_start(out, test, example)
+
+ # Use a special filename for compile(), so we can retrieve
+ # the source code during interactive debugging (see
+ # __patched_linecache_getlines).
+ filename = '' % (test.name, examplenum)
+
+ # Run the example in the given context (globs), and record
+ # any exception that gets raised. (But don't intercept
+ # keyboard interrupts.)
+ try:
+ # Don't blink! This is where the user's code gets run.
+ exec(compile(example.source, filename, "single",
+ compileflags, 1), test.globs)
+ self.debugger.set_continue() # ==== Example Finished ====
+ exception = None
+ except KeyboardInterrupt:
+ raise
+ except:
+ exception = sys.exc_info()
+ self.debugger.set_continue() # ==== Example Finished ====
+
+ got = self._fakeout.getvalue() # the actual output
+ self._fakeout.truncate(0)
+ outcome = FAILURE # guilty until proved innocent or insane
+
+ # If the example executed without raising any exceptions,
+ # verify its output.
+ if exception is None:
+ if check(example.want, got, self.optionflags):
+ outcome = SUCCESS
+
+ # The example raised an exception: check if it was expected.
+ else:
+ exc_msg = traceback.format_exception_only(*exception[:2])[-1]
+ if not quiet:
+ got += _exception_traceback(exception)
+
+ # If `example.exc_msg` is None, then we weren't expecting
+ # an exception.
+ if example.exc_msg is None:
+ outcome = BOOM
+
+ # We expected an exception: see whether it matches.
+ elif check(example.exc_msg, exc_msg, self.optionflags):
+ outcome = SUCCESS
+
+ # Another chance if they didn't care about the detail.
+ elif self.optionflags & IGNORE_EXCEPTION_DETAIL:
+ if check(_strip_exception_details(example.exc_msg),
+ _strip_exception_details(exc_msg),
+ self.optionflags):
+ outcome = SUCCESS
+
+ # Report the outcome.
+ if outcome is SUCCESS:
+ if not quiet:
+ self.report_success(out, test, example, got)
+ elif outcome is FAILURE:
+ if not quiet:
+ self.report_failure(out, test, example, got)
+ failures += 1
+ elif outcome is BOOM:
+ if not quiet:
+ self.report_unexpected_exception(out, test, example,
+ exception)
+ failures += 1
+ else:
+ assert False, ("unknown outcome", outcome)
+
+ if failures and self.optionflags & FAIL_FAST:
+ break
+
+ # Restore the option flags (in case they were modified)
+ self.optionflags = original_optionflags
+
+ # Record and return the number of failures and tries.
+ self.__record_outcome(test, failures, tries)
+ return TestResults(failures, tries)
+
+ def __record_outcome(self, test, f, t):
+ """
+ Record the fact that the given DocTest (`test`) generated `f`
+ failures out of `t` tried examples.
+ """
+ f2, t2 = self._name2ft.get(test.name, (0,0))
+ self._name2ft[test.name] = (f+f2, t+t2)
+ self.failures += f
+ self.tries += t
+
+ __LINECACHE_FILENAME_RE = re.compile(r'.+)'
+ r'\[(?P\d+)\]>$')
+ def __patched_linecache_getlines(self, filename, module_globals=None):
+ m = self.__LINECACHE_FILENAME_RE.match(filename)
+ if m and m.group('name') == self.test.name:
+ example = self.test.examples[int(m.group('examplenum'))]
+ return example.source.splitlines(keepends=True)
+ else:
+ return self.save_linecache_getlines(filename, module_globals)
+
+ def run(self, test, compileflags=None, out=None, clear_globs=True):
+ """
+ Run the examples in `test`, and display the results using the
+ writer function `out`.
+
+ The examples are run in the namespace `test.globs`. If
+ `clear_globs` is true (the default), then this namespace will
+ be cleared after the test runs, to help with garbage
+ collection. If you would like to examine the namespace after
+ the test completes, then use `clear_globs=False`.
+
+ `compileflags` gives the set of flags that should be used by
+ the Python compiler when running the examples. If not
+ specified, then it will default to the set of future-import
+ flags that apply to `globs`.
+
+ The output of each example is checked using
+ `DocTestRunner.check_output`, and the results are formatted by
+ the `DocTestRunner.report_*` methods.
+ """
+ self.test = test
+
+ if compileflags is None:
+ compileflags = _extract_future_flags(test.globs)
+
+ save_stdout = sys.stdout
+ if out is None:
+ encoding = save_stdout.encoding
+ if encoding is None or encoding.lower() == 'utf-8':
+ out = save_stdout.write
+ else:
+ # Use backslashreplace error handling on write
+ def out(s):
+ s = str(s.encode(encoding, 'backslashreplace'), encoding)
+ save_stdout.write(s)
+ sys.stdout = self._fakeout
+
+ # Patch pdb.set_trace to restore sys.stdout during interactive
+ # debugging (so it's not still redirected to self._fakeout).
+ # Note that the interactive output will go to *our*
+ # save_stdout, even if that's not the real sys.stdout; this
+ # allows us to write test cases for the set_trace behavior.
+ save_trace = sys.gettrace()
+ save_set_trace = pdb.set_trace
+ self.debugger = _OutputRedirectingPdb(save_stdout)
+ self.debugger.reset()
+ pdb.set_trace = self.debugger.set_trace
+
+ # Patch linecache.getlines, so we can see the example's source
+ # when we're inside the debugger.
+ self.save_linecache_getlines = linecache.getlines
+ linecache.getlines = self.__patched_linecache_getlines
+
+ # Make sure sys.displayhook just prints the value to stdout
+ save_displayhook = sys.displayhook
+ sys.displayhook = sys.__displayhook__
+
+ try:
+ return self.__run(test, compileflags, out)
+ finally:
+ sys.stdout = save_stdout
+ pdb.set_trace = save_set_trace
+ sys.settrace(save_trace)
+ linecache.getlines = self.save_linecache_getlines
+ sys.displayhook = save_displayhook
+ if clear_globs:
+ test.globs.clear()
+ import builtins
+ builtins._ = None
+
+ #/////////////////////////////////////////////////////////////////
+ # Summarization
+ #/////////////////////////////////////////////////////////////////
+ def summarize(self, verbose=None):
+ """
+ Print a summary of all the test cases that have been run by
+ this DocTestRunner, and return a tuple `(f, t)`, where `f` is
+ the total number of failed examples, and `t` is the total
+ number of tried examples.
+
+ The optional `verbose` argument controls how detailed the
+ summary is. If the verbosity is not specified, then the
+ DocTestRunner's verbosity is used.
+ """
+ if verbose is None:
+ verbose = self._verbose
+ notests = []
+ passed = []
+ failed = []
+ totalt = totalf = 0
+ for x in self._name2ft.items():
+ name, (f, t) = x
+ assert f <= t
+ totalt += t
+ totalf += f
+ if t == 0:
+ notests.append(name)
+ elif f == 0:
+ passed.append( (name, t) )
+ else:
+ failed.append(x)
+ if verbose:
+ if notests:
+ print(len(notests), "items had no tests:")
+ notests.sort()
+ for thing in notests:
+ print(" ", thing)
+ if passed:
+ print(len(passed), "items passed all tests:")
+ passed.sort()
+ for thing, count in passed:
+ print(" %3d tests in %s" % (count, thing))
+ if failed:
+ print(self.DIVIDER)
+ print(len(failed), "items had failures:")
+ failed.sort()
+ for thing, (f, t) in failed:
+ print(" %3d of %3d in %s" % (f, t, thing))
+ if verbose:
+ print(totalt, "tests in", len(self._name2ft), "items.")
+ print(totalt - totalf, "passed and", totalf, "failed.")
+ if totalf:
+ print("***Test Failed***", totalf, "failures.")
+ elif verbose:
+ print("Test passed.")
+ return TestResults(totalf, totalt)
+
+ #/////////////////////////////////////////////////////////////////
+ # Backward compatibility cruft to maintain doctest.master.
+ #/////////////////////////////////////////////////////////////////
+ def merge(self, other):
+ d = self._name2ft
+ for name, (f, t) in other._name2ft.items():
+ if name in d:
+ # Don't print here by default, since doing
+ # so breaks some of the buildbots
+ #print("*** DocTestRunner.merge: '" + name + "' in both" \
+ # " testers; summing outcomes.")
+ f2, t2 = d[name]
+ f = f + f2
+ t = t + t2
+ d[name] = f, t
+
+class OutputChecker:
+ """
+ A class used to check the whether the actual output from a doctest
+ example matches the expected output. `OutputChecker` defines two
+ methods: `check_output`, which compares a given pair of outputs,
+ and returns true if they match; and `output_difference`, which
+ returns a string describing the differences between two outputs.
+ """
+ def _toAscii(self, s):
+ """
+ Convert string to hex-escaped ASCII string.
+ """
+ return str(s.encode('ASCII', 'backslashreplace'), "ASCII")
+
+ def check_output(self, want, got, optionflags):
+ """
+ Return True iff the actual output from an example (`got`)
+ matches the expected output (`want`). These strings are
+ always considered to match if they are identical; but
+ depending on what option flags the test runner is using,
+ several non-exact match types are also possible. See the
+ documentation for `TestRunner` for more information about
+ option flags.
+ """
+
+ # If `want` contains hex-escaped character such as "\u1234",
+ # then `want` is a string of six characters(e.g. [\,u,1,2,3,4]).
+ # On the other hand, `got` could be another sequence of
+ # characters such as [\u1234], so `want` and `got` should
+ # be folded to hex-escaped ASCII string to compare.
+ got = self._toAscii(got)
+ want = self._toAscii(want)
+
+ # Handle the common case first, for efficiency:
+ # if they're string-identical, always return true.
+ if got == want:
+ return True
+
+ # The values True and False replaced 1 and 0 as the return
+ # value for boolean comparisons in Python 2.3.
+ if not (optionflags & DONT_ACCEPT_TRUE_FOR_1):
+ if (got,want) == ("True\n", "1\n"):
+ return True
+ if (got,want) == ("False\n", "0\n"):
+ return True
+
+ # can be used as a special sequence to signify a
+ # blank line, unless the DONT_ACCEPT_BLANKLINE flag is used.
+ if not (optionflags & DONT_ACCEPT_BLANKLINE):
+ # Replace in want with a blank line.
+ want = re.sub(r'(?m)^%s\s*?$' % re.escape(BLANKLINE_MARKER),
+ '', want)
+ # If a line in got contains only spaces, then remove the
+ # spaces.
+ got = re.sub(r'(?m)^[^\S\n]+$', '', got)
+ if got == want:
+ return True
+
+ # This flag causes doctest to ignore any differences in the
+ # contents of whitespace strings. Note that this can be used
+ # in conjunction with the ELLIPSIS flag.
+ if optionflags & NORMALIZE_WHITESPACE:
+ got = ' '.join(got.split())
+ want = ' '.join(want.split())
+ if got == want:
+ return True
+
+ # The ELLIPSIS flag says to let the sequence "..." in `want`
+ # match any substring in `got`.
+ if optionflags & ELLIPSIS:
+ if _ellipsis_match(want, got):
+ return True
+
+ # We didn't find any match; return false.
+ return False
+
+ # Should we do a fancy diff?
+ def _do_a_fancy_diff(self, want, got, optionflags):
+ # Not unless they asked for a fancy diff.
+ if not optionflags & (REPORT_UDIFF |
+ REPORT_CDIFF |
+ REPORT_NDIFF):
+ return False
+
+ # If expected output uses ellipsis, a meaningful fancy diff is
+ # too hard ... or maybe not. In two real-life failures Tim saw,
+ # a diff was a major help anyway, so this is commented out.
+ # [todo] _ellipsis_match() knows which pieces do and don't match,
+ # and could be the basis for a kick-ass diff in this case.
+ ##if optionflags & ELLIPSIS and ELLIPSIS_MARKER in want:
+ ## return False
+
+ # ndiff does intraline difference marking, so can be useful even
+ # for 1-line differences.
+ if optionflags & REPORT_NDIFF:
+ return True
+
+ # The other diff types need at least a few lines to be helpful.
+ return want.count('\n') > 2 and got.count('\n') > 2
+
+ def output_difference(self, example, got, optionflags):
+ """
+ Return a string describing the differences between the
+ expected output for a given example (`example`) and the actual
+ output (`got`). `optionflags` is the set of option flags used
+ to compare `want` and `got`.
+ """
+ want = example.want
+ # If s are being used, then replace blank lines
+ # with in the actual output string.
+ if not (optionflags & DONT_ACCEPT_BLANKLINE):
+ got = re.sub('(?m)^[ ]*(?=\n)', BLANKLINE_MARKER, got)
+
+ # Check if we should use diff.
+ if self._do_a_fancy_diff(want, got, optionflags):
+ # Split want & got into lines.
+ want_lines = want.splitlines(keepends=True)
+ got_lines = got.splitlines(keepends=True)
+ # Use difflib to find their differences.
+ if optionflags & REPORT_UDIFF:
+ diff = difflib.unified_diff(want_lines, got_lines, n=2)
+ diff = list(diff)[2:] # strip the diff header
+ kind = 'unified diff with -expected +actual'
+ elif optionflags & REPORT_CDIFF:
+ diff = difflib.context_diff(want_lines, got_lines, n=2)
+ diff = list(diff)[2:] # strip the diff header
+ kind = 'context diff with expected followed by actual'
+ elif optionflags & REPORT_NDIFF:
+ engine = difflib.Differ(charjunk=difflib.IS_CHARACTER_JUNK)
+ diff = list(engine.compare(want_lines, got_lines))
+ kind = 'ndiff with -expected +actual'
+ else:
+ assert 0, 'Bad diff option'
+ return 'Differences (%s):\n' % kind + _indent(''.join(diff))
+
+ # If we're not using diff, then simply list the expected
+ # output followed by the actual output.
+ if want and got:
+ return 'Expected:\n%sGot:\n%s' % (_indent(want), _indent(got))
+ elif want:
+ return 'Expected:\n%sGot nothing\n' % _indent(want)
+ elif got:
+ return 'Expected nothing\nGot:\n%s' % _indent(got)
+ else:
+ return 'Expected nothing\nGot nothing\n'
+
+class DocTestFailure(Exception):
+ """A DocTest example has failed in debugging mode.
+
+ The exception instance has variables:
+
+ - test: the DocTest object being run
+
+ - example: the Example object that failed
+
+ - got: the actual output
+ """
+ def __init__(self, test, example, got):
+ self.test = test
+ self.example = example
+ self.got = got
+
+ def __str__(self):
+ return str(self.test)
+
+class UnexpectedException(Exception):
+ """A DocTest example has encountered an unexpected exception
+
+ The exception instance has variables:
+
+ - test: the DocTest object being run
+
+ - example: the Example object that failed
+
+ - exc_info: the exception info
+ """
+ def __init__(self, test, example, exc_info):
+ self.test = test
+ self.example = example
+ self.exc_info = exc_info
+
+ def __str__(self):
+ return str(self.test)
+
+class DebugRunner(DocTestRunner):
+ r"""Run doc tests but raise an exception as soon as there is a failure.
+
+ If an unexpected exception occurs, an UnexpectedException is raised.
+ It contains the test, the example, and the original exception:
+
+ >>> runner = DebugRunner(verbose=False)
+ >>> test = DocTestParser().get_doctest('>>> raise KeyError\n42',
+ ... {}, 'foo', 'foo.py', 0)
+ >>> try:
+ ... runner.run(test)
+ ... except UnexpectedException as f:
+ ... failure = f
+
+ >>> failure.test is test
+ True
+
+ >>> failure.example.want
+ '42\n'
+
+ >>> exc_info = failure.exc_info
+ >>> raise exc_info[1] # Already has the traceback
+ Traceback (most recent call last):
+ ...
+ KeyError
+
+ We wrap the original exception to give the calling application
+ access to the test and example information.
+
+ If the output doesn't match, then a DocTestFailure is raised:
+
+ >>> test = DocTestParser().get_doctest('''
+ ... >>> x = 1
+ ... >>> x
+ ... 2
+ ... ''', {}, 'foo', 'foo.py', 0)
+
+ >>> try:
+ ... runner.run(test)
+ ... except DocTestFailure as f:
+ ... failure = f
+
+ DocTestFailure objects provide access to the test:
+
+ >>> failure.test is test
+ True
+
+ As well as to the example:
+
+ >>> failure.example.want
+ '2\n'
+
+ and the actual output:
+
+ >>> failure.got
+ '1\n'
+
+ If a failure or error occurs, the globals are left intact:
+
+ >>> del test.globs['__builtins__']
+ >>> test.globs
+ {'x': 1}
+
+ >>> test = DocTestParser().get_doctest('''
+ ... >>> x = 2
+ ... >>> raise KeyError
+ ... ''', {}, 'foo', 'foo.py', 0)
+
+ >>> runner.run(test)
+ Traceback (most recent call last):
+ ...
+ doctest.UnexpectedException:
+
+ >>> del test.globs['__builtins__']
+ >>> test.globs
+ {'x': 2}
+
+ But the globals are cleared if there is no error:
+
+ >>> test = DocTestParser().get_doctest('''
+ ... >>> x = 2
+ ... ''', {}, 'foo', 'foo.py', 0)
+
+ >>> runner.run(test)
+ TestResults(failed=0, attempted=1)
+
+ >>> test.globs
+ {}
+
+ """
+
+ def run(self, test, compileflags=None, out=None, clear_globs=True):
+ r = DocTestRunner.run(self, test, compileflags, out, False)
+ if clear_globs:
+ test.globs.clear()
+ return r
+
+ def report_unexpected_exception(self, out, test, example, exc_info):
+ raise UnexpectedException(test, example, exc_info)
+
+ def report_failure(self, out, test, example, got):
+ raise DocTestFailure(test, example, got)
+
+######################################################################
+## 6. Test Functions
+######################################################################
+# These should be backwards compatible.
+
+# For backward compatibility, a global instance of a DocTestRunner
+# class, updated by testmod.
+master = None
+
+def testmod(m=None, name=None, globs=None, verbose=None,
+ report=True, optionflags=0, extraglobs=None,
+ raise_on_error=False, exclude_empty=False):
+ """m=None, name=None, globs=None, verbose=None, report=True,
+ optionflags=0, extraglobs=None, raise_on_error=False,
+ exclude_empty=False
+
+ Test examples in docstrings in functions and classes reachable
+ from module m (or the current module if m is not supplied), starting
+ with m.__doc__.
+
+ Also test examples reachable from dict m.__test__ if it exists and is
+ not None. m.__test__ maps names to functions, classes and strings;
+ function and class docstrings are tested even if the name is private;
+ strings are tested directly, as if they were docstrings.
+
+ Return (#failures, #tests).
+
+ See help(doctest) for an overview.
+
+ Optional keyword arg "name" gives the name of the module; by default
+ use m.__name__.
+
+ Optional keyword arg "globs" gives a dict to be used as the globals
+ when executing examples; by default, use m.__dict__. A copy of this
+ dict is actually used for each docstring, so that each docstring's
+ examples start with a clean slate.
+
+ Optional keyword arg "extraglobs" gives a dictionary that should be
+ merged into the globals that are used to execute examples. By
+ default, no extra globals are used. This is new in 2.4.
+
+ Optional keyword arg "verbose" prints lots of stuff if true, prints
+ only failures if false; by default, it's true iff "-v" is in sys.argv.
+
+ Optional keyword arg "report" prints a summary at the end when true,
+ else prints nothing at the end. In verbose mode, the summary is
+ detailed, else very brief (in fact, empty if all tests passed).
+
+ Optional keyword arg "optionflags" or's together module constants,
+ and defaults to 0. This is new in 2.3. Possible values (see the
+ docs for details):
+
+ DONT_ACCEPT_TRUE_FOR_1
+ DONT_ACCEPT_BLANKLINE
+ NORMALIZE_WHITESPACE
+ ELLIPSIS
+ SKIP
+ IGNORE_EXCEPTION_DETAIL
+ REPORT_UDIFF
+ REPORT_CDIFF
+ REPORT_NDIFF
+ REPORT_ONLY_FIRST_FAILURE
+
+ Optional keyword arg "raise_on_error" raises an exception on the
+ first unexpected exception or failure. This allows failures to be
+ post-mortem debugged.
+
+ Advanced tomfoolery: testmod runs methods of a local instance of
+ class doctest.Tester, then merges the results into (or creates)
+ global Tester instance doctest.master. Methods of doctest.master
+ can be called directly too, if you want to do something unusual.
+ Passing report=0 to testmod is especially useful then, to delay
+ displaying a summary. Invoke doctest.master.summarize(verbose)
+ when you're done fiddling.
+ """
+ global master
+
+ # If no module was given, then use __main__.
+ if m is None:
+ # DWA - m will still be None if this wasn't invoked from the command
+ # line, in which case the following TypeError is about as good an error
+ # as we should expect
+ m = sys.modules.get('__main__')
+
+ # Check that we were actually given a module.
+ if not inspect.ismodule(m):
+ raise TypeError("testmod: module required; %r" % (m,))
+
+ # If no name was given, then use the module's name.
+ if name is None:
+ name = m.__name__
+
+ # Find, parse, and run all tests in the given module.
+ finder = DocTestFinder(exclude_empty=exclude_empty)
+
+ if raise_on_error:
+ runner = DebugRunner(verbose=verbose, optionflags=optionflags)
+ else:
+ runner = DocTestRunner(verbose=verbose, optionflags=optionflags)
+
+ for test in finder.find(m, name, globs=globs, extraglobs=extraglobs):
+ runner.run(test)
+
+ if report:
+ runner.summarize()
+
+ if master is None:
+ master = runner
+ else:
+ master.merge(runner)
+
+ return TestResults(runner.failures, runner.tries)
+
+def testfile(filename, module_relative=True, name=None, package=None,
+ globs=None, verbose=None, report=True, optionflags=0,
+ extraglobs=None, raise_on_error=False, parser=DocTestParser(),
+ encoding=None):
+ """
+ Test examples in the given file. Return (#failures, #tests).
+
+ Optional keyword arg "module_relative" specifies how filenames
+ should be interpreted:
+
+ - If "module_relative" is True (the default), then "filename"
+ specifies a module-relative path. By default, this path is
+ relative to the calling module's directory; but if the
+ "package" argument is specified, then it is relative to that
+ package. To ensure os-independence, "filename" should use
+ "/" characters to separate path segments, and should not
+ be an absolute path (i.e., it may not begin with "/").
+
+ - If "module_relative" is False, then "filename" specifies an
+ os-specific path. The path may be absolute or relative (to
+ the current working directory).
+
+ Optional keyword arg "name" gives the name of the test; by default
+ use the file's basename.
+
+ Optional keyword argument "package" is a Python package or the
+ name of a Python package whose directory should be used as the
+ base directory for a module relative filename. If no package is
+ specified, then the calling module's directory is used as the base
+ directory for module relative filenames. It is an error to
+ specify "package" if "module_relative" is False.
+
+ Optional keyword arg "globs" gives a dict to be used as the globals
+ when executing examples; by default, use {}. A copy of this dict
+ is actually used for each docstring, so that each docstring's
+ examples start with a clean slate.
+
+ Optional keyword arg "extraglobs" gives a dictionary that should be
+ merged into the globals that are used to execute examples. By
+ default, no extra globals are used.
+
+ Optional keyword arg "verbose" prints lots of stuff if true, prints
+ only failures if false; by default, it's true iff "-v" is in sys.argv.
+
+ Optional keyword arg "report" prints a summary at the end when true,
+ else prints nothing at the end. In verbose mode, the summary is
+ detailed, else very brief (in fact, empty if all tests passed).
+
+ Optional keyword arg "optionflags" or's together module constants,
+ and defaults to 0. Possible values (see the docs for details):
+
+ DONT_ACCEPT_TRUE_FOR_1
+ DONT_ACCEPT_BLANKLINE
+ NORMALIZE_WHITESPACE
+ ELLIPSIS
+ SKIP
+ IGNORE_EXCEPTION_DETAIL
+ REPORT_UDIFF
+ REPORT_CDIFF
+ REPORT_NDIFF
+ REPORT_ONLY_FIRST_FAILURE
+
+ Optional keyword arg "raise_on_error" raises an exception on the
+ first unexpected exception or failure. This allows failures to be
+ post-mortem debugged.
+
+ Optional keyword arg "parser" specifies a DocTestParser (or
+ subclass) that should be used to extract tests from the files.
+
+ Optional keyword arg "encoding" specifies an encoding that should
+ be used to convert the file to unicode.
+
+ Advanced tomfoolery: testmod runs methods of a local instance of
+ class doctest.Tester, then merges the results into (or creates)
+ global Tester instance doctest.master. Methods of doctest.master
+ can be called directly too, if you want to do something unusual.
+ Passing report=0 to testmod is especially useful then, to delay
+ displaying a summary. Invoke doctest.master.summarize(verbose)
+ when you're done fiddling.
+ """
+ global master
+
+ if package and not module_relative:
+ raise ValueError("Package may only be specified for module-"
+ "relative paths.")
+
+ # Relativize the path
+ text, filename = _load_testfile(filename, package, module_relative,
+ encoding or "utf-8")
+
+ # If no name was given, then use the file's name.
+ if name is None:
+ name = os.path.basename(filename)
+
+ # Assemble the globals.
+ if globs is None:
+ globs = {}
+ else:
+ globs = globs.copy()
+ if extraglobs is not None:
+ globs.update(extraglobs)
+ if '__name__' not in globs:
+ globs['__name__'] = '__main__'
+
+ if raise_on_error:
+ runner = DebugRunner(verbose=verbose, optionflags=optionflags)
+ else:
+ runner = DocTestRunner(verbose=verbose, optionflags=optionflags)
+
+ # Read the file, convert it to a test, and run it.
+ test = parser.get_doctest(text, globs, name, filename, 0)
+ runner.run(test)
+
+ if report:
+ runner.summarize()
+
+ if master is None:
+ master = runner
+ else:
+ master.merge(runner)
+
+ return TestResults(runner.failures, runner.tries)
+
+def run_docstring_examples(f, globs, verbose=False, name="NoName",
+ compileflags=None, optionflags=0):
+ """
+ Test examples in the given object's docstring (`f`), using `globs`
+ as globals. Optional argument `name` is used in failure messages.
+ If the optional argument `verbose` is true, then generate output
+ even if there are no failures.
+
+ `compileflags` gives the set of flags that should be used by the
+ Python compiler when running the examples. If not specified, then
+ it will default to the set of future-import flags that apply to
+ `globs`.
+
+ Optional keyword arg `optionflags` specifies options for the
+ testing and output. See the documentation for `testmod` for more
+ information.
+ """
+ # Find, parse, and run all tests in the given module.
+ finder = DocTestFinder(verbose=verbose, recurse=False)
+ runner = DocTestRunner(verbose=verbose, optionflags=optionflags)
+ for test in finder.find(f, name, globs=globs):
+ runner.run(test, compileflags=compileflags)
+
+######################################################################
+## 7. Unittest Support
+######################################################################
+
+_unittest_reportflags = 0
+
+def set_unittest_reportflags(flags):
+ """Sets the unittest option flags.
+
+ The old flag is returned so that a runner could restore the old
+ value if it wished to:
+
+ >>> import doctest
+ >>> old = doctest._unittest_reportflags
+ >>> doctest.set_unittest_reportflags(REPORT_NDIFF |
+ ... REPORT_ONLY_FIRST_FAILURE) == old
+ True
+
+ >>> doctest._unittest_reportflags == (REPORT_NDIFF |
+ ... REPORT_ONLY_FIRST_FAILURE)
+ True
+
+ Only reporting flags can be set:
+
+ >>> doctest.set_unittest_reportflags(ELLIPSIS)
+ Traceback (most recent call last):
+ ...
+ ValueError: ('Only reporting flags allowed', 8)
+
+ >>> doctest.set_unittest_reportflags(old) == (REPORT_NDIFF |
+ ... REPORT_ONLY_FIRST_FAILURE)
+ True
+ """
+ global _unittest_reportflags
+
+ if (flags & REPORTING_FLAGS) != flags:
+ raise ValueError("Only reporting flags allowed", flags)
+ old = _unittest_reportflags
+ _unittest_reportflags = flags
+ return old
+
+
+class DocTestCase(unittest.TestCase):
+
+ def __init__(self, test, optionflags=0, setUp=None, tearDown=None,
+ checker=None):
+
+ unittest.TestCase.__init__(self)
+ self._dt_optionflags = optionflags
+ self._dt_checker = checker
+ self._dt_test = test
+ self._dt_setUp = setUp
+ self._dt_tearDown = tearDown
+
+ def setUp(self):
+ test = self._dt_test
+
+ if self._dt_setUp is not None:
+ self._dt_setUp(test)
+
+ def tearDown(self):
+ test = self._dt_test
+
+ if self._dt_tearDown is not None:
+ self._dt_tearDown(test)
+
+ test.globs.clear()
+
+ def runTest(self):
+ test = self._dt_test
+ old = sys.stdout
+ new = StringIO()
+ optionflags = self._dt_optionflags
+
+ if not (optionflags & REPORTING_FLAGS):
+ # The option flags don't include any reporting flags,
+ # so add the default reporting flags
+ optionflags |= _unittest_reportflags
+
+ runner = DocTestRunner(optionflags=optionflags,
+ checker=self._dt_checker, verbose=False)
+
+ try:
+ runner.DIVIDER = "-"*70
+ failures, tries = runner.run(
+ test, out=new.write, clear_globs=False)
+ finally:
+ sys.stdout = old
+
+ if failures:
+ raise self.failureException(self.format_failure(new.getvalue()))
+
+ def format_failure(self, err):
+ test = self._dt_test
+ if test.lineno is None:
+ lineno = 'unknown line number'
+ else:
+ lineno = '%s' % test.lineno
+ lname = '.'.join(test.name.split('.')[-1:])
+ return ('Failed doctest test for %s\n'
+ ' File "%s", line %s, in %s\n\n%s'
+ % (test.name, test.filename, lineno, lname, err)
+ )
+
+ def debug(self):
+ r"""Run the test case without results and without catching exceptions
+
+ The unit test framework includes a debug method on test cases
+ and test suites to support post-mortem debugging. The test code
+ is run in such a way that errors are not caught. This way a
+ caller can catch the errors and initiate post-mortem debugging.
+
+ The DocTestCase provides a debug method that raises
+ UnexpectedException errors if there is an unexpected
+ exception:
+
+ >>> test = DocTestParser().get_doctest('>>> raise KeyError\n42',
+ ... {}, 'foo', 'foo.py', 0)
+ >>> case = DocTestCase(test)
+ >>> try:
+ ... case.debug()
+ ... except UnexpectedException as f:
+ ... failure = f
+
+ The UnexpectedException contains the test, the example, and
+ the original exception:
+
+ >>> failure.test is test
+ True
+
+ >>> failure.example.want
+ '42\n'
+
+ >>> exc_info = failure.exc_info
+ >>> raise exc_info[1] # Already has the traceback
+ Traceback (most recent call last):
+ ...
+ KeyError
+
+ If the output doesn't match, then a DocTestFailure is raised:
+
+ >>> test = DocTestParser().get_doctest('''
+ ... >>> x = 1
+ ... >>> x
+ ... 2
+ ... ''', {}, 'foo', 'foo.py', 0)
+ >>> case = DocTestCase(test)
+
+ >>> try:
+ ... case.debug()
+ ... except DocTestFailure as f:
+ ... failure = f
+
+ DocTestFailure objects provide access to the test:
+
+ >>> failure.test is test
+ True
+
+ As well as to the example:
+
+ >>> failure.example.want
+ '2\n'
+
+ and the actual output:
+
+ >>> failure.got
+ '1\n'
+
+ """
+
+ self.setUp()
+ runner = DebugRunner(optionflags=self._dt_optionflags,
+ checker=self._dt_checker, verbose=False)
+ runner.run(self._dt_test, clear_globs=False)
+ self.tearDown()
+
+ def id(self):
+ return self._dt_test.name
+
+ def __eq__(self, other):
+ if type(self) is not type(other):
+ return NotImplemented
+
+ return self._dt_test == other._dt_test and \
+ self._dt_optionflags == other._dt_optionflags and \
+ self._dt_setUp == other._dt_setUp and \
+ self._dt_tearDown == other._dt_tearDown and \
+ self._dt_checker == other._dt_checker
+
+ def __hash__(self):
+ return hash((self._dt_optionflags, self._dt_setUp, self._dt_tearDown,
+ self._dt_checker))
+
+ def __repr__(self):
+ name = self._dt_test.name.split('.')
+ return "%s (%s)" % (name[-1], '.'.join(name[:-1]))
+
+ __str__ = object.__str__
+
+ def shortDescription(self):
+ return "Doctest: " + self._dt_test.name
+
+class SkipDocTestCase(DocTestCase):
+ def __init__(self, module):
+ self.module = module
+ DocTestCase.__init__(self, None)
+
+ def setUp(self):
+ self.skipTest("DocTestSuite will not work with -O2 and above")
+
+ def test_skip(self):
+ pass
+
+ def shortDescription(self):
+ return "Skipping tests from %s" % self.module.__name__
+
+ __str__ = shortDescription
+
+
+class _DocTestSuite(unittest.TestSuite):
+
+ def _removeTestAtIndex(self, index):
+ pass
+
+
+def DocTestSuite(module=None, globs=None, extraglobs=None, test_finder=None,
+ **options):
+ """
+ Convert doctest tests for a module to a unittest test suite.
+
+ This converts each documentation string in a module that
+ contains doctest tests to a unittest test case. If any of the
+ tests in a doc string fail, then the test case fails. An exception
+ is raised showing the name of the file containing the test and a
+ (sometimes approximate) line number.
+
+ The `module` argument provides the module to be tested. The argument
+ can be either a module or a module name.
+
+ If no argument is given, the calling module is used.
+
+ A number of options may be provided as keyword arguments:
+
+ setUp
+ A set-up function. This is called before running the
+ tests in each file. The setUp function will be passed a DocTest
+ object. The setUp function can access the test globals as the
+ globs attribute of the test passed.
+
+ tearDown
+ A tear-down function. This is called after running the
+ tests in each file. The tearDown function will be passed a DocTest
+ object. The tearDown function can access the test globals as the
+ globs attribute of the test passed.
+
+ globs
+ A dictionary containing initial global variables for the tests.
+
+ optionflags
+ A set of doctest option flags expressed as an integer.
+ """
+
+ if test_finder is None:
+ test_finder = DocTestFinder()
+
+ module = _normalize_module(module)
+ tests = test_finder.find(module, globs=globs, extraglobs=extraglobs)
+
+ if not tests and sys.flags.optimize >=2:
+ # Skip doctests when running with -O2
+ suite = _DocTestSuite()
+ suite.addTest(SkipDocTestCase(module))
+ return suite
+
+ tests.sort()
+ suite = _DocTestSuite()
+
+ for test in tests:
+ if len(test.examples) == 0:
+ continue
+ if not test.filename:
+ filename = module.__file__
+ if filename[-4:] == ".pyc":
+ filename = filename[:-1]
+ test.filename = filename
+ suite.addTest(DocTestCase(test, **options))
+
+ return suite
+
+class DocFileCase(DocTestCase):
+
+ def id(self):
+ return '_'.join(self._dt_test.name.split('.'))
+
+ def __repr__(self):
+ return self._dt_test.filename
+
+ def format_failure(self, err):
+ return ('Failed doctest test for %s\n File "%s", line 0\n\n%s'
+ % (self._dt_test.name, self._dt_test.filename, err)
+ )
+
+def DocFileTest(path, module_relative=True, package=None,
+ globs=None, parser=DocTestParser(),
+ encoding=None, **options):
+ if globs is None:
+ globs = {}
+ else:
+ globs = globs.copy()
+
+ if package and not module_relative:
+ raise ValueError("Package may only be specified for module-"
+ "relative paths.")
+
+ # Relativize the path.
+ doc, path = _load_testfile(path, package, module_relative,
+ encoding or "utf-8")
+
+ if "__file__" not in globs:
+ globs["__file__"] = path
+
+ # Find the file and read it.
+ name = os.path.basename(path)
+
+ # Convert it to a test, and wrap it in a DocFileCase.
+ test = parser.get_doctest(doc, globs, name, path, 0)
+ return DocFileCase(test, **options)
+
+def DocFileSuite(*paths, **kw):
+ """A unittest suite for one or more doctest files.
+
+ The path to each doctest file is given as a string; the
+ interpretation of that string depends on the keyword argument
+ "module_relative".
+
+ A number of options may be provided as keyword arguments:
+
+ module_relative
+ If "module_relative" is True, then the given file paths are
+ interpreted as os-independent module-relative paths. By
+ default, these paths are relative to the calling module's
+ directory; but if the "package" argument is specified, then
+ they are relative to that package. To ensure os-independence,
+ "filename" should use "/" characters to separate path
+ segments, and may not be an absolute path (i.e., it may not
+ begin with "/").
+
+ If "module_relative" is False, then the given file paths are
+ interpreted as os-specific paths. These paths may be absolute
+ or relative (to the current working directory).
+
+ package
+ A Python package or the name of a Python package whose directory
+ should be used as the base directory for module relative paths.
+ If "package" is not specified, then the calling module's
+ directory is used as the base directory for module relative
+ filenames. It is an error to specify "package" if
+ "module_relative" is False.
+
+ setUp
+ A set-up function. This is called before running the
+ tests in each file. The setUp function will be passed a DocTest
+ object. The setUp function can access the test globals as the
+ globs attribute of the test passed.
+
+ tearDown
+ A tear-down function. This is called after running the
+ tests in each file. The tearDown function will be passed a DocTest
+ object. The tearDown function can access the test globals as the
+ globs attribute of the test passed.
+
+ globs
+ A dictionary containing initial global variables for the tests.
+
+ optionflags
+ A set of doctest option flags expressed as an integer.
+
+ parser
+ A DocTestParser (or subclass) that should be used to extract
+ tests from the files.
+
+ encoding
+ An encoding that will be used to convert the files to unicode.
+ """
+ suite = _DocTestSuite()
+
+ # We do this here so that _normalize_module is called at the right
+ # level. If it were called in DocFileTest, then this function
+ # would be the caller and we might guess the package incorrectly.
+ if kw.get('module_relative', True):
+ kw['package'] = _normalize_module(kw.get('package'))
+
+ for path in paths:
+ suite.addTest(DocFileTest(path, **kw))
+
+ return suite
+
+######################################################################
+## 8. Debugging Support
+######################################################################
+
+def script_from_examples(s):
+ r"""Extract script from text with examples.
+
+ Converts text with examples to a Python script. Example input is
+ converted to regular code. Example output and all other words
+ are converted to comments:
+
+ >>> text = '''
+ ... Here are examples of simple math.
+ ...
+ ... Python has super accurate integer addition
+ ...
+ ... >>> 2 + 2
+ ... 5
+ ...
+ ... And very friendly error messages:
+ ...
+ ... >>> 1/0
+ ... To Infinity
+ ... And
+ ... Beyond
+ ...
+ ... You can use logic if you want:
+ ...
+ ... >>> if 0:
+ ... ... blah
+ ... ... blah
+ ... ...
+ ...
+ ... Ho hum
+ ... '''
+
+ >>> print(script_from_examples(text))
+ # Here are examples of simple math.
+ #
+ # Python has super accurate integer addition
+ #
+ 2 + 2
+ # Expected:
+ ## 5
+ #
+ # And very friendly error messages:
+ #
+ 1/0
+ # Expected:
+ ## To Infinity
+ ## And
+ ## Beyond
+ #
+ # You can use logic if you want:
+ #
+ if 0:
+ blah
+ blah
+ #
+ # Ho hum
+
+ """
+ output = []
+ for piece in DocTestParser().parse(s):
+ if isinstance(piece, Example):
+ # Add the example's source code (strip trailing NL)
+ output.append(piece.source[:-1])
+ # Add the expected output:
+ want = piece.want
+ if want:
+ output.append('# Expected:')
+ output += ['## '+l for l in want.split('\n')[:-1]]
+ else:
+ # Add non-example text.
+ output += [_comment_line(l)
+ for l in piece.split('\n')[:-1]]
+
+ # Trim junk on both ends.
+ while output and output[-1] == '#':
+ output.pop()
+ while output and output[0] == '#':
+ output.pop(0)
+ # Combine the output, and return it.
+ # Add a courtesy newline to prevent exec from choking (see bug #1172785)
+ return '\n'.join(output) + '\n'
+
+def testsource(module, name):
+ """Extract the test sources from a doctest docstring as a script.
+
+ Provide the module (or dotted name of the module) containing the
+ test to be debugged and the name (within the module) of the object
+ with the doc string with tests to be debugged.
+ """
+ module = _normalize_module(module)
+ tests = DocTestFinder().find(module)
+ test = [t for t in tests if t.name == name]
+ if not test:
+ raise ValueError(name, "not found in tests")
+ test = test[0]
+ testsrc = script_from_examples(test.docstring)
+ return testsrc
+
+def debug_src(src, pm=False, globs=None):
+ """Debug a single doctest docstring, in argument `src`'"""
+ testsrc = script_from_examples(src)
+ debug_script(testsrc, pm, globs)
+
+def debug_script(src, pm=False, globs=None):
+ "Debug a test script. `src` is the script, as a string."
+ import pdb
+
+ if globs:
+ globs = globs.copy()
+ else:
+ globs = {}
+
+ if pm:
+ try:
+ exec(src, globs, globs)
+ except:
+ print(sys.exc_info()[1])
+ p = pdb.Pdb(nosigint=True)
+ p.reset()
+ p.interaction(None, sys.exc_info()[2])
+ else:
+ pdb.Pdb(nosigint=True).run("exec(%r)" % src, globs, globs)
+
+def debug(module, name, pm=False):
+ """Debug a single doctest docstring.
+
+ Provide the module (or dotted name of the module) containing the
+ test to be debugged and the name (within the module) of the object
+ with the docstring with tests to be debugged.
+ """
+ module = _normalize_module(module)
+ testsrc = testsource(module, name)
+ debug_script(testsrc, pm, module.__dict__)
+
+######################################################################
+## 9. Example Usage
+######################################################################
+class _TestClass:
+ """
+ A pointless class, for sanity-checking of docstring testing.
+
+ Methods:
+ square()
+ get()
+
+ >>> _TestClass(13).get() + _TestClass(-12).get()
+ 1
+ >>> hex(_TestClass(13).square().get())
+ '0xa9'
+ """
+
+ def __init__(self, val):
+ """val -> _TestClass object with associated value val.
+
+ >>> t = _TestClass(123)
+ >>> print(t.get())
+ 123
+ """
+
+ self.val = val
+
+ def square(self):
+ """square() -> square TestClass's associated value
+
+ >>> _TestClass(13).square().get()
+ 169
+ """
+
+ self.val = self.val ** 2
+ return self
+
+ def get(self):
+ """get() -> return TestClass's associated value.
+
+ >>> x = _TestClass(-42)
+ >>> print(x.get())
+ -42
+ """
+
+ return self.val
+
+__test__ = {"_TestClass": _TestClass,
+ "string": r"""
+ Example of a string object, searched as-is.
+ >>> x = 1; y = 2
+ >>> x + y, x * y
+ (3, 2)
+ """,
+
+ "bool-int equivalence": r"""
+ In 2.2, boolean expressions displayed
+ 0 or 1. By default, we still accept
+ them. This can be disabled by passing
+ DONT_ACCEPT_TRUE_FOR_1 to the new
+ optionflags argument.
+ >>> 4 == 4
+ 1
+ >>> 4 == 4
+ True
+ >>> 4 > 4
+ 0
+ >>> 4 > 4
+ False
+ """,
+
+ "blank lines": r"""
+ Blank lines can be marked with :
+ >>> print('foo\n\nbar\n')
+ foo
+
+ bar
+
+ """,
+
+ "ellipsis": r"""
+ If the ellipsis flag is used, then '...' can be used to
+ elide substrings in the desired output:
+ >>> print(list(range(1000))) #doctest: +ELLIPSIS
+ [0, 1, 2, ..., 999]
+ """,
+
+ "whitespace normalization": r"""
+ If the whitespace normalization flag is used, then
+ differences in whitespace are ignored.
+ >>> print(list(range(30))) #doctest: +NORMALIZE_WHITESPACE
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+ 27, 28, 29]
+ """,
+ }
+
+
+def _test():
+ import argparse
+
+ parser = argparse.ArgumentParser(description="doctest runner")
+ parser.add_argument('-v', '--verbose', action='store_true', default=False,
+ help='print very verbose output for all tests')
+ parser.add_argument('-o', '--option', action='append',
+ choices=OPTIONFLAGS_BY_NAME.keys(), default=[],
+ help=('specify a doctest option flag to apply'
+ ' to the test run; may be specified more'
+ ' than once to apply multiple options'))
+ parser.add_argument('-f', '--fail-fast', action='store_true',
+ help=('stop running tests after first failure (this'
+ ' is a shorthand for -o FAIL_FAST, and is'
+ ' in addition to any other -o options)'))
+ parser.add_argument('file', nargs='+',
+ help='file containing the tests to run')
+ args = parser.parse_args()
+ testfiles = args.file
+ # Verbose used to be handled by the "inspect argv" magic in DocTestRunner,
+ # but since we are using argparse we are passing it manually now.
+ verbose = args.verbose
+ options = 0
+ for option in args.option:
+ options |= OPTIONFLAGS_BY_NAME[option]
+ if args.fail_fast:
+ options |= FAIL_FAST
+ for filename in testfiles:
+ if filename.endswith(".py"):
+ # It is a module -- insert its dir into sys.path and try to
+ # import it. If it is part of a package, that possibly
+ # won't work because of package imports.
+ dirname, filename = os.path.split(filename)
+ sys.path.insert(0, dirname)
+ m = __import__(filename[:-3])
+ del sys.path[0]
+ failures, _ = testmod(m, verbose=verbose, optionflags=options)
+ else:
+ failures, _ = testfile(filename, module_relative=False,
+ verbose=verbose, optionflags=options)
+ if failures:
+ return 1
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(_test())
diff --git a/dist/lib/dummy_threading.py b/dist/lib/dummy_threading.py
new file mode 100644
index 0000000..1bb7eee
--- /dev/null
+++ b/dist/lib/dummy_threading.py
@@ -0,0 +1,78 @@
+"""Faux ``threading`` version using ``dummy_thread`` instead of ``thread``.
+
+The module ``_dummy_threading`` is added to ``sys.modules`` in order
+to not have ``threading`` considered imported. Had ``threading`` been
+directly imported it would have made all subsequent imports succeed
+regardless of whether ``_thread`` was available which is not desired.
+
+"""
+from sys import modules as sys_modules
+
+import _dummy_thread
+
+# Declaring now so as to not have to nest ``try``s to get proper clean-up.
+holding_thread = False
+holding_threading = False
+holding__threading_local = False
+
+try:
+ # Could have checked if ``_thread`` was not in sys.modules and gone
+ # a different route, but decided to mirror technique used with
+ # ``threading`` below.
+ if '_thread' in sys_modules:
+ held_thread = sys_modules['_thread']
+ holding_thread = True
+ # Must have some module named ``_thread`` that implements its API
+ # in order to initially import ``threading``.
+ sys_modules['_thread'] = sys_modules['_dummy_thread']
+
+ if 'threading' in sys_modules:
+ # If ``threading`` is already imported, might as well prevent
+ # trying to import it more than needed by saving it if it is
+ # already imported before deleting it.
+ held_threading = sys_modules['threading']
+ holding_threading = True
+ del sys_modules['threading']
+
+ if '_threading_local' in sys_modules:
+ # If ``_threading_local`` is already imported, might as well prevent
+ # trying to import it more than needed by saving it if it is
+ # already imported before deleting it.
+ held__threading_local = sys_modules['_threading_local']
+ holding__threading_local = True
+ del sys_modules['_threading_local']
+
+ import threading
+ # Need a copy of the code kept somewhere...
+ sys_modules['_dummy_threading'] = sys_modules['threading']
+ del sys_modules['threading']
+ sys_modules['_dummy__threading_local'] = sys_modules['_threading_local']
+ del sys_modules['_threading_local']
+ from _dummy_threading import *
+ from _dummy_threading import __all__
+
+finally:
+ # Put back ``threading`` if we overwrote earlier
+
+ if holding_threading:
+ sys_modules['threading'] = held_threading
+ del held_threading
+ del holding_threading
+
+ # Put back ``_threading_local`` if we overwrote earlier
+
+ if holding__threading_local:
+ sys_modules['_threading_local'] = held__threading_local
+ del held__threading_local
+ del holding__threading_local
+
+ # Put back ``thread`` if we overwrote, else del the entry we made
+ if holding_thread:
+ sys_modules['_thread'] = held_thread
+ del held_thread
+ else:
+ del sys_modules['_thread']
+ del holding_thread
+
+ del _dummy_thread
+ del sys_modules
diff --git a/dist/lib/email/__init__.py b/dist/lib/email/__init__.py
new file mode 100644
index 0000000..fae8724
--- /dev/null
+++ b/dist/lib/email/__init__.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""A package for parsing, handling, and generating email messages."""
+
+__all__ = [
+ 'base64mime',
+ 'charset',
+ 'encoders',
+ 'errors',
+ 'feedparser',
+ 'generator',
+ 'header',
+ 'iterators',
+ 'message',
+ 'message_from_file',
+ 'message_from_binary_file',
+ 'message_from_string',
+ 'message_from_bytes',
+ 'mime',
+ 'parser',
+ 'quoprimime',
+ 'utils',
+ ]
+
+
+
+# Some convenience routines. Don't import Parser and Message as side-effects
+# of importing email since those cascadingly import most of the rest of the
+# email package.
+def message_from_string(s, *args, **kws):
+ """Parse a string into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from email.parser import Parser
+ return Parser(*args, **kws).parsestr(s)
+
+def message_from_bytes(s, *args, **kws):
+ """Parse a bytes string into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from email.parser import BytesParser
+ return BytesParser(*args, **kws).parsebytes(s)
+
+def message_from_file(fp, *args, **kws):
+ """Read a file and parse its contents into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from email.parser import Parser
+ return Parser(*args, **kws).parse(fp)
+
+def message_from_binary_file(fp, *args, **kws):
+ """Read a binary file and parse its contents into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from email.parser import BytesParser
+ return BytesParser(*args, **kws).parse(fp)
diff --git a/dist/lib/email/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/__init__.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..bac7d99
Binary files /dev/null and b/dist/lib/email/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/_encoded_words.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/_encoded_words.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..c03d440
Binary files /dev/null and b/dist/lib/email/__pycache__/_encoded_words.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/_header_value_parser.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/_header_value_parser.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..0a4454c
Binary files /dev/null and b/dist/lib/email/__pycache__/_header_value_parser.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/_parseaddr.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/_parseaddr.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..c156549
Binary files /dev/null and b/dist/lib/email/__pycache__/_parseaddr.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/_policybase.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/_policybase.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..bf6c183
Binary files /dev/null and b/dist/lib/email/__pycache__/_policybase.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/base64mime.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/base64mime.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..94d5396
Binary files /dev/null and b/dist/lib/email/__pycache__/base64mime.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/charset.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/charset.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..f54350e
Binary files /dev/null and b/dist/lib/email/__pycache__/charset.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/contentmanager.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/contentmanager.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..93c4e02
Binary files /dev/null and b/dist/lib/email/__pycache__/contentmanager.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/encoders.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/encoders.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..6c7e470
Binary files /dev/null and b/dist/lib/email/__pycache__/encoders.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/errors.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/errors.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..5c44950
Binary files /dev/null and b/dist/lib/email/__pycache__/errors.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/feedparser.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/feedparser.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..958ebbe
Binary files /dev/null and b/dist/lib/email/__pycache__/feedparser.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/generator.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/generator.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..5cde304
Binary files /dev/null and b/dist/lib/email/__pycache__/generator.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/header.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/header.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..1ac1f07
Binary files /dev/null and b/dist/lib/email/__pycache__/header.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/headerregistry.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/headerregistry.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..760e687
Binary files /dev/null and b/dist/lib/email/__pycache__/headerregistry.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/iterators.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/iterators.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..244f103
Binary files /dev/null and b/dist/lib/email/__pycache__/iterators.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/message.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/message.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..25105c5
Binary files /dev/null and b/dist/lib/email/__pycache__/message.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/parser.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/parser.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..2ca864b
Binary files /dev/null and b/dist/lib/email/__pycache__/parser.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/policy.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/policy.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..e602ea5
Binary files /dev/null and b/dist/lib/email/__pycache__/policy.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/quoprimime.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/quoprimime.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..dec1328
Binary files /dev/null and b/dist/lib/email/__pycache__/quoprimime.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/__pycache__/utils.cpython-38.opt-1.pyc b/dist/lib/email/__pycache__/utils.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..1b6dc20
Binary files /dev/null and b/dist/lib/email/__pycache__/utils.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/_encoded_words.py b/dist/lib/email/_encoded_words.py
new file mode 100644
index 0000000..295ae7e
--- /dev/null
+++ b/dist/lib/email/_encoded_words.py
@@ -0,0 +1,233 @@
+""" Routines for manipulating RFC2047 encoded words.
+
+This is currently a package-private API, but will be considered for promotion
+to a public API if there is demand.
+
+"""
+
+# An ecoded word looks like this:
+#
+# =?charset[*lang]?cte?encoded_string?=
+#
+# for more information about charset see the charset module. Here it is one
+# of the preferred MIME charset names (hopefully; you never know when parsing).
+# cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case). In
+# theory other letters could be used for other encodings, but in practice this
+# (almost?) never happens. There could be a public API for adding entries
+# to the CTE tables, but YAGNI for now. 'q' is Quoted Printable, 'b' is
+# Base64. The meaning of encoded_string should be obvious. 'lang' is optional
+# as indicated by the brackets (they are not part of the syntax) but is almost
+# never encountered in practice.
+#
+# The general interface for a CTE decoder is that it takes the encoded_string
+# as its argument, and returns a tuple (cte_decoded_string, defects). The
+# cte_decoded_string is the original binary that was encoded using the
+# specified cte. 'defects' is a list of MessageDefect instances indicating any
+# problems encountered during conversion. 'charset' and 'lang' are the
+# corresponding strings extracted from the EW, case preserved.
+#
+# The general interface for a CTE encoder is that it takes a binary sequence
+# as input and returns the cte_encoded_string, which is an ascii-only string.
+#
+# Each decoder must also supply a length function that takes the binary
+# sequence as its argument and returns the length of the resulting encoded
+# string.
+#
+# The main API functions for the module are decode, which calls the decoder
+# referenced by the cte specifier, and encode, which adds the appropriate
+# RFC 2047 "chrome" to the encoded string, and can optionally automatically
+# select the shortest possible encoding. See their docstrings below for
+# details.
+
+import re
+import base64
+import binascii
+import functools
+from string import ascii_letters, digits
+from email import errors
+
+__all__ = ['decode_q',
+ 'encode_q',
+ 'decode_b',
+ 'encode_b',
+ 'len_q',
+ 'len_b',
+ 'decode',
+ 'encode',
+ ]
+
+#
+# Quoted Printable
+#
+
+# regex based decoder.
+_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub,
+ lambda m: bytes.fromhex(m.group(1).decode()))
+
+def decode_q(encoded):
+ encoded = encoded.replace(b'_', b' ')
+ return _q_byte_subber(encoded), []
+
+
+# dict mapping bytes to their encoded form
+class _QByteMap(dict):
+
+ safe = b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')
+
+ def __missing__(self, key):
+ if key in self.safe:
+ self[key] = chr(key)
+ else:
+ self[key] = "={:02X}".format(key)
+ return self[key]
+
+_q_byte_map = _QByteMap()
+
+# In headers spaces are mapped to '_'.
+_q_byte_map[ord(' ')] = '_'
+
+def encode_q(bstring):
+ return ''.join(_q_byte_map[x] for x in bstring)
+
+def len_q(bstring):
+ return sum(len(_q_byte_map[x]) for x in bstring)
+
+
+#
+# Base64
+#
+
+def decode_b(encoded):
+ # First try encoding with validate=True, fixing the padding if needed.
+ # This will succeed only if encoded includes no invalid characters.
+ pad_err = len(encoded) % 4
+ missing_padding = b'==='[:4-pad_err] if pad_err else b''
+ try:
+ return (
+ base64.b64decode(encoded + missing_padding, validate=True),
+ [errors.InvalidBase64PaddingDefect()] if pad_err else [],
+ )
+ except binascii.Error:
+ # Since we had correct padding, this is likely an invalid char error.
+ #
+ # The non-alphabet characters are ignored as far as padding
+ # goes, but we don't know how many there are. So try without adding
+ # padding to see if it works.
+ try:
+ return (
+ base64.b64decode(encoded, validate=False),
+ [errors.InvalidBase64CharactersDefect()],
+ )
+ except binascii.Error:
+ # Add as much padding as could possibly be necessary (extra padding
+ # is ignored).
+ try:
+ return (
+ base64.b64decode(encoded + b'==', validate=False),
+ [errors.InvalidBase64CharactersDefect(),
+ errors.InvalidBase64PaddingDefect()],
+ )
+ except binascii.Error:
+ # This only happens when the encoded string's length is 1 more
+ # than a multiple of 4, which is invalid.
+ #
+ # bpo-27397: Just return the encoded string since there's no
+ # way to decode.
+ return encoded, [errors.InvalidBase64LengthDefect()]
+
+def encode_b(bstring):
+ return base64.b64encode(bstring).decode('ascii')
+
+def len_b(bstring):
+ groups_of_3, leftover = divmod(len(bstring), 3)
+ # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
+ return groups_of_3 * 4 + (4 if leftover else 0)
+
+
+_cte_decoders = {
+ 'q': decode_q,
+ 'b': decode_b,
+ }
+
+def decode(ew):
+ """Decode encoded word and return (string, charset, lang, defects) tuple.
+
+ An RFC 2047/2243 encoded word has the form:
+
+ =?charset*lang?cte?encoded_string?=
+
+ where '*lang' may be omitted but the other parts may not be.
+
+ This function expects exactly such a string (that is, it does not check the
+ syntax and may raise errors if the string is not well formed), and returns
+ the encoded_string decoded first from its Content Transfer Encoding and
+ then from the resulting bytes into unicode using the specified charset. If
+ the cte-decoded string does not successfully decode using the specified
+ character set, a defect is added to the defects list and the unknown octets
+ are replaced by the unicode 'unknown' character \\uFDFF.
+
+ The specified charset and language are returned. The default for language,
+ which is rarely if ever encountered, is the empty string.
+
+ """
+ _, charset, cte, cte_string, _ = ew.split('?')
+ charset, _, lang = charset.partition('*')
+ cte = cte.lower()
+ # Recover the original bytes and do CTE decoding.
+ bstring = cte_string.encode('ascii', 'surrogateescape')
+ bstring, defects = _cte_decoders[cte](bstring)
+ # Turn the CTE decoded bytes into unicode.
+ try:
+ string = bstring.decode(charset)
+ except UnicodeError:
+ defects.append(errors.UndecodableBytesDefect("Encoded word "
+ "contains bytes not decodable using {} charset".format(charset)))
+ string = bstring.decode(charset, 'surrogateescape')
+ except LookupError:
+ string = bstring.decode('ascii', 'surrogateescape')
+ if charset.lower() != 'unknown-8bit':
+ defects.append(errors.CharsetError("Unknown charset {} "
+ "in encoded word; decoded as unknown bytes".format(charset)))
+ return string, charset, lang, defects
+
+
+_cte_encoders = {
+ 'q': encode_q,
+ 'b': encode_b,
+ }
+
+_cte_encode_length = {
+ 'q': len_q,
+ 'b': len_b,
+ }
+
+def encode(string, charset='utf-8', encoding=None, lang=''):
+ """Encode string using the CTE encoding that produces the shorter result.
+
+ Produces an RFC 2047/2243 encoded word of the form:
+
+ =?charset*lang?cte?encoded_string?=
+
+ where '*lang' is omitted unless the 'lang' parameter is given a value.
+ Optional argument charset (defaults to utf-8) specifies the charset to use
+ to encode the string to binary before CTE encoding it. Optional argument
+ 'encoding' is the cte specifier for the encoding that should be used ('q'
+ or 'b'); if it is None (the default) the encoding which produces the
+ shortest encoded sequence is used, except that 'q' is preferred if it is up
+ to five characters longer. Optional argument 'lang' (default '') gives the
+ RFC 2243 language string to specify in the encoded word.
+
+ """
+ if charset == 'unknown-8bit':
+ bstring = string.encode('ascii', 'surrogateescape')
+ else:
+ bstring = string.encode(charset)
+ if encoding is None:
+ qlen = _cte_encode_length['q'](bstring)
+ blen = _cte_encode_length['b'](bstring)
+ # Bias toward q. 5 is arbitrary.
+ encoding = 'q' if qlen - blen < 5 else 'b'
+ encoded = _cte_encoders[encoding](bstring)
+ if lang:
+ lang = '*' + lang
+ return "=?{}{}?{}?{}?=".format(charset, lang, encoding, encoded)
diff --git a/dist/lib/email/_header_value_parser.py b/dist/lib/email/_header_value_parser.py
new file mode 100644
index 0000000..51d355f
--- /dev/null
+++ b/dist/lib/email/_header_value_parser.py
@@ -0,0 +1,3003 @@
+"""Header value parser implementing various email-related RFC parsing rules.
+
+The parsing methods defined in this module implement various email related
+parsing rules. Principal among them is RFC 5322, which is the followon
+to RFC 2822 and primarily a clarification of the former. It also implements
+RFC 2047 encoded word decoding.
+
+RFC 5322 goes to considerable trouble to maintain backward compatibility with
+RFC 822 in the parse phase, while cleaning up the structure on the generation
+phase. This parser supports correct RFC 5322 generation by tagging white space
+as folding white space only when folding is allowed in the non-obsolete rule
+sets. Actually, the parser is even more generous when accepting input than RFC
+5322 mandates, following the spirit of Postel's Law, which RFC 5322 encourages.
+Where possible deviations from the standard are annotated on the 'defects'
+attribute of tokens that deviate.
+
+The general structure of the parser follows RFC 5322, and uses its terminology
+where there is a direct correspondence. Where the implementation requires a
+somewhat different structure than that used by the formal grammar, new terms
+that mimic the closest existing terms are used. Thus, it really helps to have
+a copy of RFC 5322 handy when studying this code.
+
+Input to the parser is a string that has already been unfolded according to
+RFC 5322 rules. According to the RFC this unfolding is the very first step, and
+this parser leaves the unfolding step to a higher level message parser, which
+will have already detected the line breaks that need unfolding while
+determining the beginning and end of each header.
+
+The output of the parser is a TokenList object, which is a list subclass. A
+TokenList is a recursive data structure. The terminal nodes of the structure
+are Terminal objects, which are subclasses of str. These do not correspond
+directly to terminal objects in the formal grammar, but are instead more
+practical higher level combinations of true terminals.
+
+All TokenList and Terminal objects have a 'value' attribute, which produces the
+semantically meaningful value of that part of the parse subtree. The value of
+all whitespace tokens (no matter how many sub-tokens they may contain) is a
+single space, as per the RFC rules. This includes 'CFWS', which is herein
+included in the general class of whitespace tokens. There is one exception to
+the rule that whitespace tokens are collapsed into single spaces in values: in
+the value of a 'bare-quoted-string' (a quoted-string with no leading or
+trailing whitespace), any whitespace that appeared between the quotation marks
+is preserved in the returned value. Note that in all Terminal strings quoted
+pairs are turned into their unquoted values.
+
+All TokenList and Terminal objects also have a string value, which attempts to
+be a "canonical" representation of the RFC-compliant form of the substring that
+produced the parsed subtree, including minimal use of quoted pair quoting.
+Whitespace runs are not collapsed.
+
+Comment tokens also have a 'content' attribute providing the string found
+between the parens (including any nested comments) with whitespace preserved.
+
+All TokenList and Terminal objects have a 'defects' attribute which is a
+possibly empty list all of the defects found while creating the token. Defects
+may appear on any token in the tree, and a composite list of all defects in the
+subtree is available through the 'all_defects' attribute of any node. (For
+Terminal notes x.defects == x.all_defects.)
+
+Each object in a parse tree is called a 'token', and each has a 'token_type'
+attribute that gives the name from the RFC 5322 grammar that it represents.
+Not all RFC 5322 nodes are produced, and there is one non-RFC 5322 node that
+may be produced: 'ptext'. A 'ptext' is a string of printable ascii characters.
+It is returned in place of lists of (ctext/quoted-pair) and
+(qtext/quoted-pair).
+
+XXX: provide complete list of token types.
+"""
+
+import re
+import sys
+import urllib # For urllib.parse.unquote
+from string import hexdigits
+from operator import itemgetter
+from email import _encoded_words as _ew
+from email import errors
+from email import utils
+
+#
+# Useful constants and functions
+#
+
+WSP = set(' \t')
+CFWS_LEADER = WSP | set('(')
+SPECIALS = set(r'()<>@,:;.\"[]')
+ATOM_ENDS = SPECIALS | WSP
+DOT_ATOM_ENDS = ATOM_ENDS - set('.')
+# '.', '"', and '(' do not end phrases in order to support obs-phrase
+PHRASE_ENDS = SPECIALS - set('."(')
+TSPECIALS = (SPECIALS | set('/?=')) - set('.')
+TOKEN_ENDS = TSPECIALS | WSP
+ASPECIALS = TSPECIALS | set("*'%")
+ATTRIBUTE_ENDS = ASPECIALS | WSP
+EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%')
+
+def quote_string(value):
+ return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
+
+# Match a RFC 2047 word, looks like =?utf-8?q?someword?=
+rfc2047_matcher = re.compile(r'''
+ =\? # literal =?
+ [^?]* # charset
+ \? # literal ?
+ [qQbB] # literal 'q' or 'b', case insensitive
+ \? # literal ?
+ .*? # encoded word
+ \?= # literal ?=
+''', re.VERBOSE | re.MULTILINE)
+
+
+#
+# TokenList and its subclasses
+#
+
+class TokenList(list):
+
+ token_type = None
+ syntactic_break = True
+ ew_combine_allowed = True
+
+ def __init__(self, *args, **kw):
+ super().__init__(*args, **kw)
+ self.defects = []
+
+ def __str__(self):
+ return ''.join(str(x) for x in self)
+
+ def __repr__(self):
+ return '{}({})'.format(self.__class__.__name__,
+ super().__repr__())
+
+ @property
+ def value(self):
+ return ''.join(x.value for x in self if x.value)
+
+ @property
+ def all_defects(self):
+ return sum((x.all_defects for x in self), self.defects)
+
+ def startswith_fws(self):
+ return self[0].startswith_fws()
+
+ @property
+ def as_ew_allowed(self):
+ """True if all top level tokens of this part may be RFC2047 encoded."""
+ return all(part.as_ew_allowed for part in self)
+
+ @property
+ def comments(self):
+ comments = []
+ for token in self:
+ comments.extend(token.comments)
+ return comments
+
+ def fold(self, *, policy):
+ return _refold_parse_tree(self, policy=policy)
+
+ def pprint(self, indent=''):
+ print(self.ppstr(indent=indent))
+
+ def ppstr(self, indent=''):
+ return '\n'.join(self._pp(indent=indent))
+
+ def _pp(self, indent=''):
+ yield '{}{}/{}('.format(
+ indent,
+ self.__class__.__name__,
+ self.token_type)
+ for token in self:
+ if not hasattr(token, '_pp'):
+ yield (indent + ' !! invalid element in token '
+ 'list: {!r}'.format(token))
+ else:
+ yield from token._pp(indent+' ')
+ if self.defects:
+ extra = ' Defects: {}'.format(self.defects)
+ else:
+ extra = ''
+ yield '{}){}'.format(indent, extra)
+
+
+class WhiteSpaceTokenList(TokenList):
+
+ @property
+ def value(self):
+ return ' '
+
+ @property
+ def comments(self):
+ return [x.content for x in self if x.token_type=='comment']
+
+
+class UnstructuredTokenList(TokenList):
+ token_type = 'unstructured'
+
+
+class Phrase(TokenList):
+ token_type = 'phrase'
+
+class Word(TokenList):
+ token_type = 'word'
+
+
+class CFWSList(WhiteSpaceTokenList):
+ token_type = 'cfws'
+
+
+class Atom(TokenList):
+ token_type = 'atom'
+
+
+class Token(TokenList):
+ token_type = 'token'
+ encode_as_ew = False
+
+
+class EncodedWord(TokenList):
+ token_type = 'encoded-word'
+ cte = None
+ charset = None
+ lang = None
+
+
+class QuotedString(TokenList):
+
+ token_type = 'quoted-string'
+
+ @property
+ def content(self):
+ for x in self:
+ if x.token_type == 'bare-quoted-string':
+ return x.value
+
+ @property
+ def quoted_value(self):
+ res = []
+ for x in self:
+ if x.token_type == 'bare-quoted-string':
+ res.append(str(x))
+ else:
+ res.append(x.value)
+ return ''.join(res)
+
+ @property
+ def stripped_value(self):
+ for token in self:
+ if token.token_type == 'bare-quoted-string':
+ return token.value
+
+
+class BareQuotedString(QuotedString):
+
+ token_type = 'bare-quoted-string'
+
+ def __str__(self):
+ return quote_string(''.join(str(x) for x in self))
+
+ @property
+ def value(self):
+ return ''.join(str(x) for x in self)
+
+
+class Comment(WhiteSpaceTokenList):
+
+ token_type = 'comment'
+
+ def __str__(self):
+ return ''.join(sum([
+ ["("],
+ [self.quote(x) for x in self],
+ [")"],
+ ], []))
+
+ def quote(self, value):
+ if value.token_type == 'comment':
+ return str(value)
+ return str(value).replace('\\', '\\\\').replace(
+ '(', r'\(').replace(
+ ')', r'\)')
+
+ @property
+ def content(self):
+ return ''.join(str(x) for x in self)
+
+ @property
+ def comments(self):
+ return [self.content]
+
+class AddressList(TokenList):
+
+ token_type = 'address-list'
+
+ @property
+ def addresses(self):
+ return [x for x in self if x.token_type=='address']
+
+ @property
+ def mailboxes(self):
+ return sum((x.mailboxes
+ for x in self if x.token_type=='address'), [])
+
+ @property
+ def all_mailboxes(self):
+ return sum((x.all_mailboxes
+ for x in self if x.token_type=='address'), [])
+
+
+class Address(TokenList):
+
+ token_type = 'address'
+
+ @property
+ def display_name(self):
+ if self[0].token_type == 'group':
+ return self[0].display_name
+
+ @property
+ def mailboxes(self):
+ if self[0].token_type == 'mailbox':
+ return [self[0]]
+ elif self[0].token_type == 'invalid-mailbox':
+ return []
+ return self[0].mailboxes
+
+ @property
+ def all_mailboxes(self):
+ if self[0].token_type == 'mailbox':
+ return [self[0]]
+ elif self[0].token_type == 'invalid-mailbox':
+ return [self[0]]
+ return self[0].all_mailboxes
+
+class MailboxList(TokenList):
+
+ token_type = 'mailbox-list'
+
+ @property
+ def mailboxes(self):
+ return [x for x in self if x.token_type=='mailbox']
+
+ @property
+ def all_mailboxes(self):
+ return [x for x in self
+ if x.token_type in ('mailbox', 'invalid-mailbox')]
+
+
+class GroupList(TokenList):
+
+ token_type = 'group-list'
+
+ @property
+ def mailboxes(self):
+ if not self or self[0].token_type != 'mailbox-list':
+ return []
+ return self[0].mailboxes
+
+ @property
+ def all_mailboxes(self):
+ if not self or self[0].token_type != 'mailbox-list':
+ return []
+ return self[0].all_mailboxes
+
+
+class Group(TokenList):
+
+ token_type = "group"
+
+ @property
+ def mailboxes(self):
+ if self[2].token_type != 'group-list':
+ return []
+ return self[2].mailboxes
+
+ @property
+ def all_mailboxes(self):
+ if self[2].token_type != 'group-list':
+ return []
+ return self[2].all_mailboxes
+
+ @property
+ def display_name(self):
+ return self[0].display_name
+
+
+class NameAddr(TokenList):
+
+ token_type = 'name-addr'
+
+ @property
+ def display_name(self):
+ if len(self) == 1:
+ return None
+ return self[0].display_name
+
+ @property
+ def local_part(self):
+ return self[-1].local_part
+
+ @property
+ def domain(self):
+ return self[-1].domain
+
+ @property
+ def route(self):
+ return self[-1].route
+
+ @property
+ def addr_spec(self):
+ return self[-1].addr_spec
+
+
+class AngleAddr(TokenList):
+
+ token_type = 'angle-addr'
+
+ @property
+ def local_part(self):
+ for x in self:
+ if x.token_type == 'addr-spec':
+ return x.local_part
+
+ @property
+ def domain(self):
+ for x in self:
+ if x.token_type == 'addr-spec':
+ return x.domain
+
+ @property
+ def route(self):
+ for x in self:
+ if x.token_type == 'obs-route':
+ return x.domains
+
+ @property
+ def addr_spec(self):
+ for x in self:
+ if x.token_type == 'addr-spec':
+ if x.local_part:
+ return x.addr_spec
+ else:
+ return quote_string(x.local_part) + x.addr_spec
+ else:
+ return '<>'
+
+
+class ObsRoute(TokenList):
+
+ token_type = 'obs-route'
+
+ @property
+ def domains(self):
+ return [x.domain for x in self if x.token_type == 'domain']
+
+
+class Mailbox(TokenList):
+
+ token_type = 'mailbox'
+
+ @property
+ def display_name(self):
+ if self[0].token_type == 'name-addr':
+ return self[0].display_name
+
+ @property
+ def local_part(self):
+ return self[0].local_part
+
+ @property
+ def domain(self):
+ return self[0].domain
+
+ @property
+ def route(self):
+ if self[0].token_type == 'name-addr':
+ return self[0].route
+
+ @property
+ def addr_spec(self):
+ return self[0].addr_spec
+
+
+class InvalidMailbox(TokenList):
+
+ token_type = 'invalid-mailbox'
+
+ @property
+ def display_name(self):
+ return None
+
+ local_part = domain = route = addr_spec = display_name
+
+
+class Domain(TokenList):
+
+ token_type = 'domain'
+ as_ew_allowed = False
+
+ @property
+ def domain(self):
+ return ''.join(super().value.split())
+
+
+class DotAtom(TokenList):
+ token_type = 'dot-atom'
+
+
+class DotAtomText(TokenList):
+ token_type = 'dot-atom-text'
+ as_ew_allowed = True
+
+
+class NoFoldLiteral(TokenList):
+ token_type = 'no-fold-literal'
+ as_ew_allowed = False
+
+
+class AddrSpec(TokenList):
+
+ token_type = 'addr-spec'
+ as_ew_allowed = False
+
+ @property
+ def local_part(self):
+ return self[0].local_part
+
+ @property
+ def domain(self):
+ if len(self) < 3:
+ return None
+ return self[-1].domain
+
+ @property
+ def value(self):
+ if len(self) < 3:
+ return self[0].value
+ return self[0].value.rstrip()+self[1].value+self[2].value.lstrip()
+
+ @property
+ def addr_spec(self):
+ nameset = set(self.local_part)
+ if len(nameset) > len(nameset-DOT_ATOM_ENDS):
+ lp = quote_string(self.local_part)
+ else:
+ lp = self.local_part
+ if self.domain is not None:
+ return lp + '@' + self.domain
+ return lp
+
+
+class ObsLocalPart(TokenList):
+
+ token_type = 'obs-local-part'
+ as_ew_allowed = False
+
+
+class DisplayName(Phrase):
+
+ token_type = 'display-name'
+ ew_combine_allowed = False
+
+ @property
+ def display_name(self):
+ res = TokenList(self)
+ if len(res) == 0:
+ return res.value
+ if res[0].token_type == 'cfws':
+ res.pop(0)
+ else:
+ if res[0][0].token_type == 'cfws':
+ res[0] = TokenList(res[0][1:])
+ if res[-1].token_type == 'cfws':
+ res.pop()
+ else:
+ if res[-1][-1].token_type == 'cfws':
+ res[-1] = TokenList(res[-1][:-1])
+ return res.value
+
+ @property
+ def value(self):
+ quote = False
+ if self.defects:
+ quote = True
+ else:
+ for x in self:
+ if x.token_type == 'quoted-string':
+ quote = True
+ if len(self) != 0 and quote:
+ pre = post = ''
+ if self[0].token_type=='cfws' or self[0][0].token_type=='cfws':
+ pre = ' '
+ if self[-1].token_type=='cfws' or self[-1][-1].token_type=='cfws':
+ post = ' '
+ return pre+quote_string(self.display_name)+post
+ else:
+ return super().value
+
+
+class LocalPart(TokenList):
+
+ token_type = 'local-part'
+ as_ew_allowed = False
+
+ @property
+ def value(self):
+ if self[0].token_type == "quoted-string":
+ return self[0].quoted_value
+ else:
+ return self[0].value
+
+ @property
+ def local_part(self):
+ # Strip whitespace from front, back, and around dots.
+ res = [DOT]
+ last = DOT
+ last_is_tl = False
+ for tok in self[0] + [DOT]:
+ if tok.token_type == 'cfws':
+ continue
+ if (last_is_tl and tok.token_type == 'dot' and
+ last[-1].token_type == 'cfws'):
+ res[-1] = TokenList(last[:-1])
+ is_tl = isinstance(tok, TokenList)
+ if (is_tl and last.token_type == 'dot' and
+ tok[0].token_type == 'cfws'):
+ res.append(TokenList(tok[1:]))
+ else:
+ res.append(tok)
+ last = res[-1]
+ last_is_tl = is_tl
+ res = TokenList(res[1:-1])
+ return res.value
+
+
+class DomainLiteral(TokenList):
+
+ token_type = 'domain-literal'
+ as_ew_allowed = False
+
+ @property
+ def domain(self):
+ return ''.join(super().value.split())
+
+ @property
+ def ip(self):
+ for x in self:
+ if x.token_type == 'ptext':
+ return x.value
+
+
+class MIMEVersion(TokenList):
+
+ token_type = 'mime-version'
+ major = None
+ minor = None
+
+
+class Parameter(TokenList):
+
+ token_type = 'parameter'
+ sectioned = False
+ extended = False
+ charset = 'us-ascii'
+
+ @property
+ def section_number(self):
+ # Because the first token, the attribute (name) eats CFWS, the second
+ # token is always the section if there is one.
+ return self[1].number if self.sectioned else 0
+
+ @property
+ def param_value(self):
+ # This is part of the "handle quoted extended parameters" hack.
+ for token in self:
+ if token.token_type == 'value':
+ return token.stripped_value
+ if token.token_type == 'quoted-string':
+ for token in token:
+ if token.token_type == 'bare-quoted-string':
+ for token in token:
+ if token.token_type == 'value':
+ return token.stripped_value
+ return ''
+
+
+class InvalidParameter(Parameter):
+
+ token_type = 'invalid-parameter'
+
+
+class Attribute(TokenList):
+
+ token_type = 'attribute'
+
+ @property
+ def stripped_value(self):
+ for token in self:
+ if token.token_type.endswith('attrtext'):
+ return token.value
+
+class Section(TokenList):
+
+ token_type = 'section'
+ number = None
+
+
+class Value(TokenList):
+
+ token_type = 'value'
+
+ @property
+ def stripped_value(self):
+ token = self[0]
+ if token.token_type == 'cfws':
+ token = self[1]
+ if token.token_type.endswith(
+ ('quoted-string', 'attribute', 'extended-attribute')):
+ return token.stripped_value
+ return self.value
+
+
+class MimeParameters(TokenList):
+
+ token_type = 'mime-parameters'
+ syntactic_break = False
+
+ @property
+ def params(self):
+ # The RFC specifically states that the ordering of parameters is not
+ # guaranteed and may be reordered by the transport layer. So we have
+ # to assume the RFC 2231 pieces can come in any order. However, we
+ # output them in the order that we first see a given name, which gives
+ # us a stable __str__.
+ params = {} # Using order preserving dict from Python 3.7+
+ for token in self:
+ if not token.token_type.endswith('parameter'):
+ continue
+ if token[0].token_type != 'attribute':
+ continue
+ name = token[0].value.strip()
+ if name not in params:
+ params[name] = []
+ params[name].append((token.section_number, token))
+ for name, parts in params.items():
+ parts = sorted(parts, key=itemgetter(0))
+ first_param = parts[0][1]
+ charset = first_param.charset
+ # Our arbitrary error recovery is to ignore duplicate parameters,
+ # to use appearance order if there are duplicate rfc 2231 parts,
+ # and to ignore gaps. This mimics the error recovery of get_param.
+ if not first_param.extended and len(parts) > 1:
+ if parts[1][0] == 0:
+ parts[1][1].defects.append(errors.InvalidHeaderDefect(
+ 'duplicate parameter name; duplicate(s) ignored'))
+ parts = parts[:1]
+ # Else assume the *0* was missing...note that this is different
+ # from get_param, but we registered a defect for this earlier.
+ value_parts = []
+ i = 0
+ for section_number, param in parts:
+ if section_number != i:
+ # We could get fancier here and look for a complete
+ # duplicate extended parameter and ignore the second one
+ # seen. But we're not doing that. The old code didn't.
+ if not param.extended:
+ param.defects.append(errors.InvalidHeaderDefect(
+ 'duplicate parameter name; duplicate ignored'))
+ continue
+ else:
+ param.defects.append(errors.InvalidHeaderDefect(
+ "inconsistent RFC2231 parameter numbering"))
+ i += 1
+ value = param.param_value
+ if param.extended:
+ try:
+ value = urllib.parse.unquote_to_bytes(value)
+ except UnicodeEncodeError:
+ # source had surrogate escaped bytes. What we do now
+ # is a bit of an open question. I'm not sure this is
+ # the best choice, but it is what the old algorithm did
+ value = urllib.parse.unquote(value, encoding='latin-1')
+ else:
+ try:
+ value = value.decode(charset, 'surrogateescape')
+ except LookupError:
+ # XXX: there should really be a custom defect for
+ # unknown character set to make it easy to find,
+ # because otherwise unknown charset is a silent
+ # failure.
+ value = value.decode('us-ascii', 'surrogateescape')
+ if utils._has_surrogates(value):
+ param.defects.append(errors.UndecodableBytesDefect())
+ value_parts.append(value)
+ value = ''.join(value_parts)
+ yield name, value
+
+ def __str__(self):
+ params = []
+ for name, value in self.params:
+ if value:
+ params.append('{}={}'.format(name, quote_string(value)))
+ else:
+ params.append(name)
+ params = '; '.join(params)
+ return ' ' + params if params else ''
+
+
+class ParameterizedHeaderValue(TokenList):
+
+ # Set this false so that the value doesn't wind up on a new line even
+ # if it and the parameters would fit there but not on the first line.
+ syntactic_break = False
+
+ @property
+ def params(self):
+ for token in reversed(self):
+ if token.token_type == 'mime-parameters':
+ return token.params
+ return {}
+
+
+class ContentType(ParameterizedHeaderValue):
+ token_type = 'content-type'
+ as_ew_allowed = False
+ maintype = 'text'
+ subtype = 'plain'
+
+
+class ContentDisposition(ParameterizedHeaderValue):
+ token_type = 'content-disposition'
+ as_ew_allowed = False
+ content_disposition = None
+
+
+class ContentTransferEncoding(TokenList):
+ token_type = 'content-transfer-encoding'
+ as_ew_allowed = False
+ cte = '7bit'
+
+
+class HeaderLabel(TokenList):
+ token_type = 'header-label'
+ as_ew_allowed = False
+
+
+class MsgID(TokenList):
+ token_type = 'msg-id'
+ as_ew_allowed = False
+
+ def fold(self, policy):
+ # message-id tokens may not be folded.
+ return str(self) + policy.linesep
+
+
+class MessageID(MsgID):
+ token_type = 'message-id'
+
+
+class InvalidMessageID(MessageID):
+ token_type = 'invalid-message-id'
+
+
+class Header(TokenList):
+ token_type = 'header'
+
+
+#
+# Terminal classes and instances
+#
+
+class Terminal(str):
+
+ as_ew_allowed = True
+ ew_combine_allowed = True
+ syntactic_break = True
+
+ def __new__(cls, value, token_type):
+ self = super().__new__(cls, value)
+ self.token_type = token_type
+ self.defects = []
+ return self
+
+ def __repr__(self):
+ return "{}({})".format(self.__class__.__name__, super().__repr__())
+
+ def pprint(self):
+ print(self.__class__.__name__ + '/' + self.token_type)
+
+ @property
+ def all_defects(self):
+ return list(self.defects)
+
+ def _pp(self, indent=''):
+ return ["{}{}/{}({}){}".format(
+ indent,
+ self.__class__.__name__,
+ self.token_type,
+ super().__repr__(),
+ '' if not self.defects else ' {}'.format(self.defects),
+ )]
+
+ def pop_trailing_ws(self):
+ # This terminates the recursion.
+ return None
+
+ @property
+ def comments(self):
+ return []
+
+ def __getnewargs__(self):
+ return(str(self), self.token_type)
+
+
+class WhiteSpaceTerminal(Terminal):
+
+ @property
+ def value(self):
+ return ' '
+
+ def startswith_fws(self):
+ return True
+
+
+class ValueTerminal(Terminal):
+
+ @property
+ def value(self):
+ return self
+
+ def startswith_fws(self):
+ return False
+
+
+class EWWhiteSpaceTerminal(WhiteSpaceTerminal):
+
+ @property
+ def value(self):
+ return ''
+
+ def __str__(self):
+ return ''
+
+
+class _InvalidEwError(errors.HeaderParseError):
+ """Invalid encoded word found while parsing headers."""
+
+
+# XXX these need to become classes and used as instances so
+# that a program can't change them in a parse tree and screw
+# up other parse trees. Maybe should have tests for that, too.
+DOT = ValueTerminal('.', 'dot')
+ListSeparator = ValueTerminal(',', 'list-separator')
+RouteComponentMarker = ValueTerminal('@', 'route-component-marker')
+
+#
+# Parser
+#
+
+# Parse strings according to RFC822/2047/2822/5322 rules.
+#
+# This is a stateless parser. Each get_XXX function accepts a string and
+# returns either a Terminal or a TokenList representing the RFC object named
+# by the method and a string containing the remaining unparsed characters
+# from the input. Thus a parser method consumes the next syntactic construct
+# of a given type and returns a token representing the construct plus the
+# unparsed remainder of the input string.
+#
+# For example, if the first element of a structured header is a 'phrase',
+# then:
+#
+# phrase, value = get_phrase(value)
+#
+# returns the complete phrase from the start of the string value, plus any
+# characters left in the string after the phrase is removed.
+
+_wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split
+_non_atom_end_matcher = re.compile(r"[^{}]+".format(
+ re.escape(''.join(ATOM_ENDS)))).match
+_non_printable_finder = re.compile(r"[\x00-\x20\x7F]").findall
+_non_token_end_matcher = re.compile(r"[^{}]+".format(
+ re.escape(''.join(TOKEN_ENDS)))).match
+_non_attribute_end_matcher = re.compile(r"[^{}]+".format(
+ re.escape(''.join(ATTRIBUTE_ENDS)))).match
+_non_extended_attribute_end_matcher = re.compile(r"[^{}]+".format(
+ re.escape(''.join(EXTENDED_ATTRIBUTE_ENDS)))).match
+
+def _validate_xtext(xtext):
+ """If input token contains ASCII non-printables, register a defect."""
+
+ non_printables = _non_printable_finder(xtext)
+ if non_printables:
+ xtext.defects.append(errors.NonPrintableDefect(non_printables))
+ if utils._has_surrogates(xtext):
+ xtext.defects.append(errors.UndecodableBytesDefect(
+ "Non-ASCII characters found in header token"))
+
+def _get_ptext_to_endchars(value, endchars):
+ """Scan printables/quoted-pairs until endchars and return unquoted ptext.
+
+ This function turns a run of qcontent, ccontent-without-comments, or
+ dtext-with-quoted-printables into a single string by unquoting any
+ quoted printables. It returns the string, the remaining value, and
+ a flag that is True iff there were any quoted printables decoded.
+
+ """
+ fragment, *remainder = _wsp_splitter(value, 1)
+ vchars = []
+ escape = False
+ had_qp = False
+ for pos in range(len(fragment)):
+ if fragment[pos] == '\\':
+ if escape:
+ escape = False
+ had_qp = True
+ else:
+ escape = True
+ continue
+ if escape:
+ escape = False
+ elif fragment[pos] in endchars:
+ break
+ vchars.append(fragment[pos])
+ else:
+ pos = pos + 1
+ return ''.join(vchars), ''.join([fragment[pos:]] + remainder), had_qp
+
+def get_fws(value):
+ """FWS = 1*WSP
+
+ This isn't the RFC definition. We're using fws to represent tokens where
+ folding can be done, but when we are parsing the *un*folding has already
+ been done so we don't need to watch out for CRLF.
+
+ """
+ newvalue = value.lstrip()
+ fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
+ return fws, newvalue
+
+def get_encoded_word(value):
+ """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
+
+ """
+ ew = EncodedWord()
+ if not value.startswith('=?'):
+ raise errors.HeaderParseError(
+ "expected encoded word but found {}".format(value))
+ tok, *remainder = value[2:].split('?=', 1)
+ if tok == value[2:]:
+ raise errors.HeaderParseError(
+ "expected encoded word but found {}".format(value))
+ remstr = ''.join(remainder)
+ if (len(remstr) > 1 and
+ remstr[0] in hexdigits and
+ remstr[1] in hexdigits and
+ tok.count('?') < 2):
+ # The ? after the CTE was followed by an encoded word escape (=XX).
+ rest, *remainder = remstr.split('?=', 1)
+ tok = tok + '?=' + rest
+ if len(tok.split()) > 1:
+ ew.defects.append(errors.InvalidHeaderDefect(
+ "whitespace inside encoded word"))
+ ew.cte = value
+ value = ''.join(remainder)
+ try:
+ text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
+ except (ValueError, KeyError):
+ raise _InvalidEwError(
+ "encoded word format invalid: '{}'".format(ew.cte))
+ ew.charset = charset
+ ew.lang = lang
+ ew.defects.extend(defects)
+ while text:
+ if text[0] in WSP:
+ token, text = get_fws(text)
+ ew.append(token)
+ continue
+ chars, *remainder = _wsp_splitter(text, 1)
+ vtext = ValueTerminal(chars, 'vtext')
+ _validate_xtext(vtext)
+ ew.append(vtext)
+ text = ''.join(remainder)
+ # Encoded words should be followed by a WS
+ if value and value[0] not in WSP:
+ ew.defects.append(errors.InvalidHeaderDefect(
+ "missing trailing whitespace after encoded-word"))
+ return ew, value
+
+def get_unstructured(value):
+ """unstructured = (*([FWS] vchar) *WSP) / obs-unstruct
+ obs-unstruct = *((*LF *CR *(obs-utext) *LF *CR)) / FWS)
+ obs-utext = %d0 / obs-NO-WS-CTL / LF / CR
+
+ obs-NO-WS-CTL is control characters except WSP/CR/LF.
+
+ So, basically, we have printable runs, plus control characters or nulls in
+ the obsolete syntax, separated by whitespace. Since RFC 2047 uses the
+ obsolete syntax in its specification, but requires whitespace on either
+ side of the encoded words, I can see no reason to need to separate the
+ non-printable-non-whitespace from the printable runs if they occur, so we
+ parse this into xtext tokens separated by WSP tokens.
+
+ Because an 'unstructured' value must by definition constitute the entire
+ value, this 'get' routine does not return a remaining value, only the
+ parsed TokenList.
+
+ """
+ # XXX: but what about bare CR and LF? They might signal the start or
+ # end of an encoded word. YAGNI for now, since our current parsers
+ # will never send us strings with bare CR or LF.
+
+ unstructured = UnstructuredTokenList()
+ while value:
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ unstructured.append(token)
+ continue
+ valid_ew = True
+ if value.startswith('=?'):
+ try:
+ token, value = get_encoded_word(value)
+ except _InvalidEwError:
+ valid_ew = False
+ except errors.HeaderParseError:
+ # XXX: Need to figure out how to register defects when
+ # appropriate here.
+ pass
+ else:
+ have_ws = True
+ if len(unstructured) > 0:
+ if unstructured[-1].token_type != 'fws':
+ unstructured.defects.append(errors.InvalidHeaderDefect(
+ "missing whitespace before encoded word"))
+ have_ws = False
+ if have_ws and len(unstructured) > 1:
+ if unstructured[-2].token_type == 'encoded-word':
+ unstructured[-1] = EWWhiteSpaceTerminal(
+ unstructured[-1], 'fws')
+ unstructured.append(token)
+ continue
+ tok, *remainder = _wsp_splitter(value, 1)
+ # Split in the middle of an atom if there is a rfc2047 encoded word
+ # which does not have WSP on both sides. The defect will be registered
+ # the next time through the loop.
+ # This needs to only be performed when the encoded word is valid;
+ # otherwise, performing it on an invalid encoded word can cause
+ # the parser to go in an infinite loop.
+ if valid_ew and rfc2047_matcher.search(tok):
+ tok, *remainder = value.partition('=?')
+ vtext = ValueTerminal(tok, 'vtext')
+ _validate_xtext(vtext)
+ unstructured.append(vtext)
+ value = ''.join(remainder)
+ return unstructured
+
+def get_qp_ctext(value):
+ r"""ctext =
+
+ This is not the RFC ctext, since we are handling nested comments in comment
+ and unquoting quoted-pairs here. We allow anything except the '()'
+ characters, but if we find any ASCII other than the RFC defined printable
+ ASCII, a NonPrintableDefect is added to the token's defects list. Since
+ quoted pairs are converted to their unquoted values, what is returned is
+ a 'ptext' token. In this case it is a WhiteSpaceTerminal, so it's value
+ is ' '.
+
+ """
+ ptext, value, _ = _get_ptext_to_endchars(value, '()')
+ ptext = WhiteSpaceTerminal(ptext, 'ptext')
+ _validate_xtext(ptext)
+ return ptext, value
+
+def get_qcontent(value):
+ """qcontent = qtext / quoted-pair
+
+ We allow anything except the DQUOTE character, but if we find any ASCII
+ other than the RFC defined printable ASCII, a NonPrintableDefect is
+ added to the token's defects list. Any quoted pairs are converted to their
+ unquoted values, so what is returned is a 'ptext' token. In this case it
+ is a ValueTerminal.
+
+ """
+ ptext, value, _ = _get_ptext_to_endchars(value, '"')
+ ptext = ValueTerminal(ptext, 'ptext')
+ _validate_xtext(ptext)
+ return ptext, value
+
+def get_atext(value):
+ """atext =
+
+ We allow any non-ATOM_ENDS in atext, but add an InvalidATextDefect to
+ the token's defects list if we find non-atext characters.
+ """
+ m = _non_atom_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected atext but found '{}'".format(value))
+ atext = m.group()
+ value = value[len(atext):]
+ atext = ValueTerminal(atext, 'atext')
+ _validate_xtext(atext)
+ return atext, value
+
+def get_bare_quoted_string(value):
+ """bare-quoted-string = DQUOTE *([FWS] qcontent) [FWS] DQUOTE
+
+ A quoted-string without the leading or trailing white space. Its
+ value is the text between the quote marks, with whitespace
+ preserved and quoted pairs decoded.
+ """
+ if value[0] != '"':
+ raise errors.HeaderParseError(
+ "expected '\"' but found '{}'".format(value))
+ bare_quoted_string = BareQuotedString()
+ value = value[1:]
+ if value and value[0] == '"':
+ token, value = get_qcontent(value)
+ bare_quoted_string.append(token)
+ while value and value[0] != '"':
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ elif value[:2] == '=?':
+ valid_ew = False
+ try:
+ token, value = get_encoded_word(value)
+ bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
+ "encoded word inside quoted string"))
+ valid_ew = True
+ except errors.HeaderParseError:
+ token, value = get_qcontent(value)
+ # Collapse the whitespace between two encoded words that occur in a
+ # bare-quoted-string.
+ if valid_ew and len(bare_quoted_string) > 1:
+ if (bare_quoted_string[-1].token_type == 'fws' and
+ bare_quoted_string[-2].token_type == 'encoded-word'):
+ bare_quoted_string[-1] = EWWhiteSpaceTerminal(
+ bare_quoted_string[-1], 'fws')
+ else:
+ token, value = get_qcontent(value)
+ bare_quoted_string.append(token)
+ if not value:
+ bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
+ "end of header inside quoted string"))
+ return bare_quoted_string, value
+ return bare_quoted_string, value[1:]
+
+def get_comment(value):
+ """comment = "(" *([FWS] ccontent) [FWS] ")"
+ ccontent = ctext / quoted-pair / comment
+
+ We handle nested comments here, and quoted-pair in our qp-ctext routine.
+ """
+ if value and value[0] != '(':
+ raise errors.HeaderParseError(
+ "expected '(' but found '{}'".format(value))
+ comment = Comment()
+ value = value[1:]
+ while value and value[0] != ")":
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ elif value[0] == '(':
+ token, value = get_comment(value)
+ else:
+ token, value = get_qp_ctext(value)
+ comment.append(token)
+ if not value:
+ comment.defects.append(errors.InvalidHeaderDefect(
+ "end of header inside comment"))
+ return comment, value
+ return comment, value[1:]
+
+def get_cfws(value):
+ """CFWS = (1*([FWS] comment) [FWS]) / FWS
+
+ """
+ cfws = CFWSList()
+ while value and value[0] in CFWS_LEADER:
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ else:
+ token, value = get_comment(value)
+ cfws.append(token)
+ return cfws, value
+
+def get_quoted_string(value):
+ """quoted-string = [CFWS] [CFWS]
+
+ 'bare-quoted-string' is an intermediate class defined by this
+ parser and not by the RFC grammar. It is the quoted string
+ without any attached CFWS.
+ """
+ quoted_string = QuotedString()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ quoted_string.append(token)
+ token, value = get_bare_quoted_string(value)
+ quoted_string.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ quoted_string.append(token)
+ return quoted_string, value
+
+def get_atom(value):
+ """atom = [CFWS] 1*atext [CFWS]
+
+ An atom could be an rfc2047 encoded word.
+ """
+ atom = Atom()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ atom.append(token)
+ if value and value[0] in ATOM_ENDS:
+ raise errors.HeaderParseError(
+ "expected atom but found '{}'".format(value))
+ if value.startswith('=?'):
+ try:
+ token, value = get_encoded_word(value)
+ except errors.HeaderParseError:
+ # XXX: need to figure out how to register defects when
+ # appropriate here.
+ token, value = get_atext(value)
+ else:
+ token, value = get_atext(value)
+ atom.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ atom.append(token)
+ return atom, value
+
+def get_dot_atom_text(value):
+ """ dot-text = 1*atext *("." 1*atext)
+
+ """
+ dot_atom_text = DotAtomText()
+ if not value or value[0] in ATOM_ENDS:
+ raise errors.HeaderParseError("expected atom at a start of "
+ "dot-atom-text but found '{}'".format(value))
+ while value and value[0] not in ATOM_ENDS:
+ token, value = get_atext(value)
+ dot_atom_text.append(token)
+ if value and value[0] == '.':
+ dot_atom_text.append(DOT)
+ value = value[1:]
+ if dot_atom_text[-1] is DOT:
+ raise errors.HeaderParseError("expected atom at end of dot-atom-text "
+ "but found '{}'".format('.'+value))
+ return dot_atom_text, value
+
+def get_dot_atom(value):
+ """ dot-atom = [CFWS] dot-atom-text [CFWS]
+
+ Any place we can have a dot atom, we could instead have an rfc2047 encoded
+ word.
+ """
+ dot_atom = DotAtom()
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ dot_atom.append(token)
+ if value.startswith('=?'):
+ try:
+ token, value = get_encoded_word(value)
+ except errors.HeaderParseError:
+ # XXX: need to figure out how to register defects when
+ # appropriate here.
+ token, value = get_dot_atom_text(value)
+ else:
+ token, value = get_dot_atom_text(value)
+ dot_atom.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ dot_atom.append(token)
+ return dot_atom, value
+
+def get_word(value):
+ """word = atom / quoted-string
+
+ Either atom or quoted-string may start with CFWS. We have to peel off this
+ CFWS first to determine which type of word to parse. Afterward we splice
+ the leading CFWS, if any, into the parsed sub-token.
+
+ If neither an atom or a quoted-string is found before the next special, a
+ HeaderParseError is raised.
+
+ The token returned is either an Atom or a QuotedString, as appropriate.
+ This means the 'word' level of the formal grammar is not represented in the
+ parse tree; this is because having that extra layer when manipulating the
+ parse tree is more confusing than it is helpful.
+
+ """
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ else:
+ leader = None
+ if not value:
+ raise errors.HeaderParseError(
+ "Expected 'atom' or 'quoted-string' but found nothing.")
+ if value[0]=='"':
+ token, value = get_quoted_string(value)
+ elif value[0] in SPECIALS:
+ raise errors.HeaderParseError("Expected 'atom' or 'quoted-string' "
+ "but found '{}'".format(value))
+ else:
+ token, value = get_atom(value)
+ if leader is not None:
+ token[:0] = [leader]
+ return token, value
+
+def get_phrase(value):
+ """ phrase = 1*word / obs-phrase
+ obs-phrase = word *(word / "." / CFWS)
+
+ This means a phrase can be a sequence of words, periods, and CFWS in any
+ order as long as it starts with at least one word. If anything other than
+ words is detected, an ObsoleteHeaderDefect is added to the token's defect
+ list. We also accept a phrase that starts with CFWS followed by a dot;
+ this is registered as an InvalidHeaderDefect, since it is not supported by
+ even the obsolete grammar.
+
+ """
+ phrase = Phrase()
+ try:
+ token, value = get_word(value)
+ phrase.append(token)
+ except errors.HeaderParseError:
+ phrase.defects.append(errors.InvalidHeaderDefect(
+ "phrase does not start with word"))
+ while value and value[0] not in PHRASE_ENDS:
+ if value[0]=='.':
+ phrase.append(DOT)
+ phrase.defects.append(errors.ObsoleteHeaderDefect(
+ "period in 'phrase'"))
+ value = value[1:]
+ else:
+ try:
+ token, value = get_word(value)
+ except errors.HeaderParseError:
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ phrase.defects.append(errors.ObsoleteHeaderDefect(
+ "comment found without atom"))
+ else:
+ raise
+ phrase.append(token)
+ return phrase, value
+
+def get_local_part(value):
+ """ local-part = dot-atom / quoted-string / obs-local-part
+
+ """
+ local_part = LocalPart()
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected local-part but found '{}'".format(value))
+ try:
+ token, value = get_dot_atom(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_word(value)
+ except errors.HeaderParseError:
+ if value[0] != '\\' and value[0] in PHRASE_ENDS:
+ raise
+ token = TokenList()
+ if leader is not None:
+ token[:0] = [leader]
+ local_part.append(token)
+ if value and (value[0]=='\\' or value[0] not in PHRASE_ENDS):
+ obs_local_part, value = get_obs_local_part(str(local_part) + value)
+ if obs_local_part.token_type == 'invalid-obs-local-part':
+ local_part.defects.append(errors.InvalidHeaderDefect(
+ "local-part is not dot-atom, quoted-string, or obs-local-part"))
+ else:
+ local_part.defects.append(errors.ObsoleteHeaderDefect(
+ "local-part is not a dot-atom (contains CFWS)"))
+ local_part[0] = obs_local_part
+ try:
+ local_part.value.encode('ascii')
+ except UnicodeEncodeError:
+ local_part.defects.append(errors.NonASCIILocalPartDefect(
+ "local-part contains non-ASCII characters)"))
+ return local_part, value
+
+def get_obs_local_part(value):
+ """ obs-local-part = word *("." word)
+ """
+ obs_local_part = ObsLocalPart()
+ last_non_ws_was_dot = False
+ while value and (value[0]=='\\' or value[0] not in PHRASE_ENDS):
+ if value[0] == '.':
+ if last_non_ws_was_dot:
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "invalid repeated '.'"))
+ obs_local_part.append(DOT)
+ last_non_ws_was_dot = True
+ value = value[1:]
+ continue
+ elif value[0]=='\\':
+ obs_local_part.append(ValueTerminal(value[0],
+ 'misplaced-special'))
+ value = value[1:]
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "'\\' character outside of quoted-string/ccontent"))
+ last_non_ws_was_dot = False
+ continue
+ if obs_local_part and obs_local_part[-1].token_type != 'dot':
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "missing '.' between words"))
+ try:
+ token, value = get_word(value)
+ last_non_ws_was_dot = False
+ except errors.HeaderParseError:
+ if value[0] not in CFWS_LEADER:
+ raise
+ token, value = get_cfws(value)
+ obs_local_part.append(token)
+ if (obs_local_part[0].token_type == 'dot' or
+ obs_local_part[0].token_type=='cfws' and
+ obs_local_part[1].token_type=='dot'):
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "Invalid leading '.' in local part"))
+ if (obs_local_part[-1].token_type == 'dot' or
+ obs_local_part[-1].token_type=='cfws' and
+ obs_local_part[-2].token_type=='dot'):
+ obs_local_part.defects.append(errors.InvalidHeaderDefect(
+ "Invalid trailing '.' in local part"))
+ if obs_local_part.defects:
+ obs_local_part.token_type = 'invalid-obs-local-part'
+ return obs_local_part, value
+
+def get_dtext(value):
+ r""" dtext = / obs-dtext
+ obs-dtext = obs-NO-WS-CTL / quoted-pair
+
+ We allow anything except the excluded characters, but if we find any
+ ASCII other than the RFC defined printable ASCII, a NonPrintableDefect is
+ added to the token's defects list. Quoted pairs are converted to their
+ unquoted values, so what is returned is a ptext token, in this case a
+ ValueTerminal. If there were quoted-printables, an ObsoleteHeaderDefect is
+ added to the returned token's defect list.
+
+ """
+ ptext, value, had_qp = _get_ptext_to_endchars(value, '[]')
+ ptext = ValueTerminal(ptext, 'ptext')
+ if had_qp:
+ ptext.defects.append(errors.ObsoleteHeaderDefect(
+ "quoted printable found in domain-literal"))
+ _validate_xtext(ptext)
+ return ptext, value
+
+def _check_for_early_dl_end(value, domain_literal):
+ if value:
+ return False
+ domain_literal.append(errors.InvalidHeaderDefect(
+ "end of input inside domain-literal"))
+ domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
+ return True
+
+def get_domain_literal(value):
+ """ domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
+
+ """
+ domain_literal = DomainLiteral()
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ domain_literal.append(token)
+ if not value:
+ raise errors.HeaderParseError("expected domain-literal")
+ if value[0] != '[':
+ raise errors.HeaderParseError("expected '[' at start of domain-literal "
+ "but found '{}'".format(value))
+ value = value[1:]
+ if _check_for_early_dl_end(value, domain_literal):
+ return domain_literal, value
+ domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ domain_literal.append(token)
+ token, value = get_dtext(value)
+ domain_literal.append(token)
+ if _check_for_early_dl_end(value, domain_literal):
+ return domain_literal, value
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ domain_literal.append(token)
+ if _check_for_early_dl_end(value, domain_literal):
+ return domain_literal, value
+ if value[0] != ']':
+ raise errors.HeaderParseError("expected ']' at end of domain-literal "
+ "but found '{}'".format(value))
+ domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
+ value = value[1:]
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ domain_literal.append(token)
+ return domain_literal, value
+
+def get_domain(value):
+ """ domain = dot-atom / domain-literal / obs-domain
+ obs-domain = atom *("." atom))
+
+ """
+ domain = Domain()
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected domain but found '{}'".format(value))
+ if value[0] == '[':
+ token, value = get_domain_literal(value)
+ if leader is not None:
+ token[:0] = [leader]
+ domain.append(token)
+ return domain, value
+ try:
+ token, value = get_dot_atom(value)
+ except errors.HeaderParseError:
+ token, value = get_atom(value)
+ if value and value[0] == '@':
+ raise errors.HeaderParseError('Invalid Domain')
+ if leader is not None:
+ token[:0] = [leader]
+ domain.append(token)
+ if value and value[0] == '.':
+ domain.defects.append(errors.ObsoleteHeaderDefect(
+ "domain is not a dot-atom (contains CFWS)"))
+ if domain[0].token_type == 'dot-atom':
+ domain[:] = domain[0]
+ while value and value[0] == '.':
+ domain.append(DOT)
+ token, value = get_atom(value[1:])
+ domain.append(token)
+ return domain, value
+
+def get_addr_spec(value):
+ """ addr-spec = local-part "@" domain
+
+ """
+ addr_spec = AddrSpec()
+ token, value = get_local_part(value)
+ addr_spec.append(token)
+ if not value or value[0] != '@':
+ addr_spec.defects.append(errors.InvalidHeaderDefect(
+ "addr-spec local part with no domain"))
+ return addr_spec, value
+ addr_spec.append(ValueTerminal('@', 'address-at-symbol'))
+ token, value = get_domain(value[1:])
+ addr_spec.append(token)
+ return addr_spec, value
+
+def get_obs_route(value):
+ """ obs-route = obs-domain-list ":"
+ obs-domain-list = *(CFWS / ",") "@" domain *("," [CFWS] ["@" domain])
+
+ Returns an obs-route token with the appropriate sub-tokens (that is,
+ there is no obs-domain-list in the parse tree).
+ """
+ obs_route = ObsRoute()
+ while value and (value[0]==',' or value[0] in CFWS_LEADER):
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ obs_route.append(token)
+ elif value[0] == ',':
+ obs_route.append(ListSeparator)
+ value = value[1:]
+ if not value or value[0] != '@':
+ raise errors.HeaderParseError(
+ "expected obs-route domain but found '{}'".format(value))
+ obs_route.append(RouteComponentMarker)
+ token, value = get_domain(value[1:])
+ obs_route.append(token)
+ while value and value[0]==',':
+ obs_route.append(ListSeparator)
+ value = value[1:]
+ if not value:
+ break
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ obs_route.append(token)
+ if value[0] == '@':
+ obs_route.append(RouteComponentMarker)
+ token, value = get_domain(value[1:])
+ obs_route.append(token)
+ if not value:
+ raise errors.HeaderParseError("end of header while parsing obs-route")
+ if value[0] != ':':
+ raise errors.HeaderParseError( "expected ':' marking end of "
+ "obs-route but found '{}'".format(value))
+ obs_route.append(ValueTerminal(':', 'end-of-obs-route-marker'))
+ return obs_route, value[1:]
+
+def get_angle_addr(value):
+ """ angle-addr = [CFWS] "<" addr-spec ">" [CFWS] / obs-angle-addr
+ obs-angle-addr = [CFWS] "<" obs-route addr-spec ">" [CFWS]
+
+ """
+ angle_addr = AngleAddr()
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ angle_addr.append(token)
+ if not value or value[0] != '<':
+ raise errors.HeaderParseError(
+ "expected angle-addr but found '{}'".format(value))
+ angle_addr.append(ValueTerminal('<', 'angle-addr-start'))
+ value = value[1:]
+ # Although it is not legal per RFC5322, SMTP uses '<>' in certain
+ # circumstances.
+ if value[0] == '>':
+ angle_addr.append(ValueTerminal('>', 'angle-addr-end'))
+ angle_addr.defects.append(errors.InvalidHeaderDefect(
+ "null addr-spec in angle-addr"))
+ value = value[1:]
+ return angle_addr, value
+ try:
+ token, value = get_addr_spec(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_obs_route(value)
+ angle_addr.defects.append(errors.ObsoleteHeaderDefect(
+ "obsolete route specification in angle-addr"))
+ except errors.HeaderParseError:
+ raise errors.HeaderParseError(
+ "expected addr-spec or obs-route but found '{}'".format(value))
+ angle_addr.append(token)
+ token, value = get_addr_spec(value)
+ angle_addr.append(token)
+ if value and value[0] == '>':
+ value = value[1:]
+ else:
+ angle_addr.defects.append(errors.InvalidHeaderDefect(
+ "missing trailing '>' on angle-addr"))
+ angle_addr.append(ValueTerminal('>', 'angle-addr-end'))
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ angle_addr.append(token)
+ return angle_addr, value
+
+def get_display_name(value):
+ """ display-name = phrase
+
+ Because this is simply a name-rule, we don't return a display-name
+ token containing a phrase, but rather a display-name token with
+ the content of the phrase.
+
+ """
+ display_name = DisplayName()
+ token, value = get_phrase(value)
+ display_name.extend(token[:])
+ display_name.defects = token.defects[:]
+ return display_name, value
+
+
+def get_name_addr(value):
+ """ name-addr = [display-name] angle-addr
+
+ """
+ name_addr = NameAddr()
+ # Both the optional display name and the angle-addr can start with cfws.
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected name-addr but found '{}'".format(leader))
+ if value[0] != '<':
+ if value[0] in PHRASE_ENDS:
+ raise errors.HeaderParseError(
+ "expected name-addr but found '{}'".format(value))
+ token, value = get_display_name(value)
+ if not value:
+ raise errors.HeaderParseError(
+ "expected name-addr but found '{}'".format(token))
+ if leader is not None:
+ token[0][:0] = [leader]
+ leader = None
+ name_addr.append(token)
+ token, value = get_angle_addr(value)
+ if leader is not None:
+ token[:0] = [leader]
+ name_addr.append(token)
+ return name_addr, value
+
+def get_mailbox(value):
+ """ mailbox = name-addr / addr-spec
+
+ """
+ # The only way to figure out if we are dealing with a name-addr or an
+ # addr-spec is to try parsing each one.
+ mailbox = Mailbox()
+ try:
+ token, value = get_name_addr(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_addr_spec(value)
+ except errors.HeaderParseError:
+ raise errors.HeaderParseError(
+ "expected mailbox but found '{}'".format(value))
+ if any(isinstance(x, errors.InvalidHeaderDefect)
+ for x in token.all_defects):
+ mailbox.token_type = 'invalid-mailbox'
+ mailbox.append(token)
+ return mailbox, value
+
+def get_invalid_mailbox(value, endchars):
+ """ Read everything up to one of the chars in endchars.
+
+ This is outside the formal grammar. The InvalidMailbox TokenList that is
+ returned acts like a Mailbox, but the data attributes are None.
+
+ """
+ invalid_mailbox = InvalidMailbox()
+ while value and value[0] not in endchars:
+ if value[0] in PHRASE_ENDS:
+ invalid_mailbox.append(ValueTerminal(value[0],
+ 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ invalid_mailbox.append(token)
+ return invalid_mailbox, value
+
+def get_mailbox_list(value):
+ """ mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list
+ obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS])
+
+ For this routine we go outside the formal grammar in order to improve error
+ handling. We recognize the end of the mailbox list only at the end of the
+ value or at a ';' (the group terminator). This is so that we can turn
+ invalid mailboxes into InvalidMailbox tokens and continue parsing any
+ remaining valid mailboxes. We also allow all mailbox entries to be null,
+ and this condition is handled appropriately at a higher level.
+
+ """
+ mailbox_list = MailboxList()
+ while value and value[0] != ';':
+ try:
+ token, value = get_mailbox(value)
+ mailbox_list.append(token)
+ except errors.HeaderParseError:
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value or value[0] in ',;':
+ mailbox_list.append(leader)
+ mailbox_list.defects.append(errors.ObsoleteHeaderDefect(
+ "empty element in mailbox-list"))
+ else:
+ token, value = get_invalid_mailbox(value, ',;')
+ if leader is not None:
+ token[:0] = [leader]
+ mailbox_list.append(token)
+ mailbox_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid mailbox in mailbox-list"))
+ elif value[0] == ',':
+ mailbox_list.defects.append(errors.ObsoleteHeaderDefect(
+ "empty element in mailbox-list"))
+ else:
+ token, value = get_invalid_mailbox(value, ',;')
+ if leader is not None:
+ token[:0] = [leader]
+ mailbox_list.append(token)
+ mailbox_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid mailbox in mailbox-list"))
+ if value and value[0] not in ',;':
+ # Crap after mailbox; treat it as an invalid mailbox.
+ # The mailbox info will still be available.
+ mailbox = mailbox_list[-1]
+ mailbox.token_type = 'invalid-mailbox'
+ token, value = get_invalid_mailbox(value, ',;')
+ mailbox.extend(token)
+ mailbox_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid mailbox in mailbox-list"))
+ if value and value[0] == ',':
+ mailbox_list.append(ListSeparator)
+ value = value[1:]
+ return mailbox_list, value
+
+
+def get_group_list(value):
+ """ group-list = mailbox-list / CFWS / obs-group-list
+ obs-group-list = 1*([CFWS] ",") [CFWS]
+
+ """
+ group_list = GroupList()
+ if not value:
+ group_list.defects.append(errors.InvalidHeaderDefect(
+ "end of header before group-list"))
+ return group_list, value
+ leader = None
+ if value and value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ # This should never happen in email parsing, since CFWS-only is a
+ # legal alternative to group-list in a group, which is the only
+ # place group-list appears.
+ group_list.defects.append(errors.InvalidHeaderDefect(
+ "end of header in group-list"))
+ group_list.append(leader)
+ return group_list, value
+ if value[0] == ';':
+ group_list.append(leader)
+ return group_list, value
+ token, value = get_mailbox_list(value)
+ if len(token.all_mailboxes)==0:
+ if leader is not None:
+ group_list.append(leader)
+ group_list.extend(token)
+ group_list.defects.append(errors.ObsoleteHeaderDefect(
+ "group-list with empty entries"))
+ return group_list, value
+ if leader is not None:
+ token[:0] = [leader]
+ group_list.append(token)
+ return group_list, value
+
+def get_group(value):
+ """ group = display-name ":" [group-list] ";" [CFWS]
+
+ """
+ group = Group()
+ token, value = get_display_name(value)
+ if not value or value[0] != ':':
+ raise errors.HeaderParseError("expected ':' at end of group "
+ "display name but found '{}'".format(value))
+ group.append(token)
+ group.append(ValueTerminal(':', 'group-display-name-terminator'))
+ value = value[1:]
+ if value and value[0] == ';':
+ group.append(ValueTerminal(';', 'group-terminator'))
+ return group, value[1:]
+ token, value = get_group_list(value)
+ group.append(token)
+ if not value:
+ group.defects.append(errors.InvalidHeaderDefect(
+ "end of header in group"))
+ elif value[0] != ';':
+ raise errors.HeaderParseError(
+ "expected ';' at end of group but found {}".format(value))
+ group.append(ValueTerminal(';', 'group-terminator'))
+ value = value[1:]
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ group.append(token)
+ return group, value
+
+def get_address(value):
+ """ address = mailbox / group
+
+ Note that counter-intuitively, an address can be either a single address or
+ a list of addresses (a group). This is why the returned Address object has
+ a 'mailboxes' attribute which treats a single address as a list of length
+ one. When you need to differentiate between to two cases, extract the single
+ element, which is either a mailbox or a group token.
+
+ """
+ # The formal grammar isn't very helpful when parsing an address. mailbox
+ # and group, especially when allowing for obsolete forms, start off very
+ # similarly. It is only when you reach one of @, <, or : that you know
+ # what you've got. So, we try each one in turn, starting with the more
+ # likely of the two. We could perhaps make this more efficient by looking
+ # for a phrase and then branching based on the next character, but that
+ # would be a premature optimization.
+ address = Address()
+ try:
+ token, value = get_group(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_mailbox(value)
+ except errors.HeaderParseError:
+ raise errors.HeaderParseError(
+ "expected address but found '{}'".format(value))
+ address.append(token)
+ return address, value
+
+def get_address_list(value):
+ """ address_list = (address *("," address)) / obs-addr-list
+ obs-addr-list = *([CFWS] ",") address *("," [address / CFWS])
+
+ We depart from the formal grammar here by continuing to parse until the end
+ of the input, assuming the input to be entirely composed of an
+ address-list. This is always true in email parsing, and allows us
+ to skip invalid addresses to parse additional valid ones.
+
+ """
+ address_list = AddressList()
+ while value:
+ try:
+ token, value = get_address(value)
+ address_list.append(token)
+ except errors.HeaderParseError as err:
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value or value[0] == ',':
+ address_list.append(leader)
+ address_list.defects.append(errors.ObsoleteHeaderDefect(
+ "address-list entry with no content"))
+ else:
+ token, value = get_invalid_mailbox(value, ',')
+ if leader is not None:
+ token[:0] = [leader]
+ address_list.append(Address([token]))
+ address_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid address in address-list"))
+ elif value[0] == ',':
+ address_list.defects.append(errors.ObsoleteHeaderDefect(
+ "empty element in address-list"))
+ else:
+ token, value = get_invalid_mailbox(value, ',')
+ if leader is not None:
+ token[:0] = [leader]
+ address_list.append(Address([token]))
+ address_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid address in address-list"))
+ if value and value[0] != ',':
+ # Crap after address; treat it as an invalid mailbox.
+ # The mailbox info will still be available.
+ mailbox = address_list[-1][0]
+ mailbox.token_type = 'invalid-mailbox'
+ token, value = get_invalid_mailbox(value, ',')
+ mailbox.extend(token)
+ address_list.defects.append(errors.InvalidHeaderDefect(
+ "invalid address in address-list"))
+ if value: # Must be a , at this point.
+ address_list.append(ValueTerminal(',', 'list-separator'))
+ value = value[1:]
+ return address_list, value
+
+
+def get_no_fold_literal(value):
+ """ no-fold-literal = "[" *dtext "]"
+ """
+ no_fold_literal = NoFoldLiteral()
+ if not value:
+ raise errors.HeaderParseError(
+ "expected no-fold-literal but found '{}'".format(value))
+ if value[0] != '[':
+ raise errors.HeaderParseError(
+ "expected '[' at the start of no-fold-literal "
+ "but found '{}'".format(value))
+ no_fold_literal.append(ValueTerminal('[', 'no-fold-literal-start'))
+ value = value[1:]
+ token, value = get_dtext(value)
+ no_fold_literal.append(token)
+ if not value or value[0] != ']':
+ raise errors.HeaderParseError(
+ "expected ']' at the end of no-fold-literal "
+ "but found '{}'".format(value))
+ no_fold_literal.append(ValueTerminal(']', 'no-fold-literal-end'))
+ return no_fold_literal, value[1:]
+
+def get_msg_id(value):
+ """msg-id = [CFWS] "<" id-left '@' id-right ">" [CFWS]
+ id-left = dot-atom-text / obs-id-left
+ id-right = dot-atom-text / no-fold-literal / obs-id-right
+ no-fold-literal = "[" *dtext "]"
+ """
+ msg_id = MsgID()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ msg_id.append(token)
+ if not value or value[0] != '<':
+ raise errors.HeaderParseError(
+ "expected msg-id but found '{}'".format(value))
+ msg_id.append(ValueTerminal('<', 'msg-id-start'))
+ value = value[1:]
+ # Parse id-left.
+ try:
+ token, value = get_dot_atom_text(value)
+ except errors.HeaderParseError:
+ try:
+ # obs-id-left is same as local-part of add-spec.
+ token, value = get_obs_local_part(value)
+ msg_id.defects.append(errors.ObsoleteHeaderDefect(
+ "obsolete id-left in msg-id"))
+ except errors.HeaderParseError:
+ raise errors.HeaderParseError(
+ "expected dot-atom-text or obs-id-left"
+ " but found '{}'".format(value))
+ msg_id.append(token)
+ if not value or value[0] != '@':
+ msg_id.defects.append(errors.InvalidHeaderDefect(
+ "msg-id with no id-right"))
+ # Even though there is no id-right, if the local part
+ # ends with `>` let's just parse it too and return
+ # along with the defect.
+ if value and value[0] == '>':
+ msg_id.append(ValueTerminal('>', 'msg-id-end'))
+ value = value[1:]
+ return msg_id, value
+ msg_id.append(ValueTerminal('@', 'address-at-symbol'))
+ value = value[1:]
+ # Parse id-right.
+ try:
+ token, value = get_dot_atom_text(value)
+ except errors.HeaderParseError:
+ try:
+ token, value = get_no_fold_literal(value)
+ except errors.HeaderParseError as e:
+ try:
+ token, value = get_domain(value)
+ msg_id.defects.append(errors.ObsoleteHeaderDefect(
+ "obsolete id-right in msg-id"))
+ except errors.HeaderParseError:
+ raise errors.HeaderParseError(
+ "expected dot-atom-text, no-fold-literal or obs-id-right"
+ " but found '{}'".format(value))
+ msg_id.append(token)
+ if value and value[0] == '>':
+ value = value[1:]
+ else:
+ msg_id.defects.append(errors.InvalidHeaderDefect(
+ "missing trailing '>' on msg-id"))
+ msg_id.append(ValueTerminal('>', 'msg-id-end'))
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ msg_id.append(token)
+ return msg_id, value
+
+
+def parse_message_id(value):
+ """message-id = "Message-ID:" msg-id CRLF
+ """
+ message_id = MessageID()
+ try:
+ token, value = get_msg_id(value)
+ message_id.append(token)
+ except errors.HeaderParseError as ex:
+ token = get_unstructured(value)
+ message_id = InvalidMessageID(token)
+ message_id.defects.append(
+ errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex)))
+ else:
+ # Value after parsing a valid msg_id should be None.
+ if value:
+ message_id.defects.append(errors.InvalidHeaderDefect(
+ "Unexpected {!r}".format(value)))
+
+ return message_id
+
+#
+# XXX: As I begin to add additional header parsers, I'm realizing we probably
+# have two level of parser routines: the get_XXX methods that get a token in
+# the grammar, and parse_XXX methods that parse an entire field value. So
+# get_address_list above should really be a parse_ method, as probably should
+# be get_unstructured.
+#
+
+def parse_mime_version(value):
+ """ mime-version = [CFWS] 1*digit [CFWS] "." [CFWS] 1*digit [CFWS]
+
+ """
+ # The [CFWS] is implicit in the RFC 2045 BNF.
+ # XXX: This routine is a bit verbose, should factor out a get_int method.
+ mime_version = MIMEVersion()
+ if not value:
+ mime_version.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing MIME version number (eg: 1.0)"))
+ return mime_version
+ if value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if not value:
+ mime_version.defects.append(errors.HeaderMissingRequiredValue(
+ "Expected MIME version number but found only CFWS"))
+ digits = ''
+ while value and value[0] != '.' and value[0] not in CFWS_LEADER:
+ digits += value[0]
+ value = value[1:]
+ if not digits.isdigit():
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Expected MIME major version number but found {!r}".format(digits)))
+ mime_version.append(ValueTerminal(digits, 'xtext'))
+ else:
+ mime_version.major = int(digits)
+ mime_version.append(ValueTerminal(digits, 'digits'))
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if not value or value[0] != '.':
+ if mime_version.major is not None:
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Incomplete MIME version; found only major number"))
+ if value:
+ mime_version.append(ValueTerminal(value, 'xtext'))
+ return mime_version
+ mime_version.append(ValueTerminal('.', 'version-separator'))
+ value = value[1:]
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if not value:
+ if mime_version.major is not None:
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Incomplete MIME version; found only major number"))
+ return mime_version
+ digits = ''
+ while value and value[0] not in CFWS_LEADER:
+ digits += value[0]
+ value = value[1:]
+ if not digits.isdigit():
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Expected MIME minor version number but found {!r}".format(digits)))
+ mime_version.append(ValueTerminal(digits, 'xtext'))
+ else:
+ mime_version.minor = int(digits)
+ mime_version.append(ValueTerminal(digits, 'digits'))
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mime_version.append(token)
+ if value:
+ mime_version.defects.append(errors.InvalidHeaderDefect(
+ "Excess non-CFWS text after MIME version"))
+ mime_version.append(ValueTerminal(value, 'xtext'))
+ return mime_version
+
+def get_invalid_parameter(value):
+ """ Read everything up to the next ';'.
+
+ This is outside the formal grammar. The InvalidParameter TokenList that is
+ returned acts like a Parameter, but the data attributes are None.
+
+ """
+ invalid_parameter = InvalidParameter()
+ while value and value[0] != ';':
+ if value[0] in PHRASE_ENDS:
+ invalid_parameter.append(ValueTerminal(value[0],
+ 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ invalid_parameter.append(token)
+ return invalid_parameter, value
+
+def get_ttext(value):
+ """ttext =
+
+ We allow any non-TOKEN_ENDS in ttext, but add defects to the token's
+ defects list if we find non-ttext characters. We also register defects for
+ *any* non-printables even though the RFC doesn't exclude all of them,
+ because we follow the spirit of RFC 5322.
+
+ """
+ m = _non_token_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected ttext but found '{}'".format(value))
+ ttext = m.group()
+ value = value[len(ttext):]
+ ttext = ValueTerminal(ttext, 'ttext')
+ _validate_xtext(ttext)
+ return ttext, value
+
+def get_token(value):
+ """token = [CFWS] 1*ttext [CFWS]
+
+ The RFC equivalent of ttext is any US-ASCII chars except space, ctls, or
+ tspecials. We also exclude tabs even though the RFC doesn't.
+
+ The RFC implies the CFWS but is not explicit about it in the BNF.
+
+ """
+ mtoken = Token()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mtoken.append(token)
+ if value and value[0] in TOKEN_ENDS:
+ raise errors.HeaderParseError(
+ "expected token but found '{}'".format(value))
+ token, value = get_ttext(value)
+ mtoken.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ mtoken.append(token)
+ return mtoken, value
+
+def get_attrtext(value):
+ """attrtext = 1*(any non-ATTRIBUTE_ENDS character)
+
+ We allow any non-ATTRIBUTE_ENDS in attrtext, but add defects to the
+ token's defects list if we find non-attrtext characters. We also register
+ defects for *any* non-printables even though the RFC doesn't exclude all of
+ them, because we follow the spirit of RFC 5322.
+
+ """
+ m = _non_attribute_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected attrtext but found {!r}".format(value))
+ attrtext = m.group()
+ value = value[len(attrtext):]
+ attrtext = ValueTerminal(attrtext, 'attrtext')
+ _validate_xtext(attrtext)
+ return attrtext, value
+
+def get_attribute(value):
+ """ [CFWS] 1*attrtext [CFWS]
+
+ This version of the BNF makes the CFWS explicit, and as usual we use a
+ value terminal for the actual run of characters. The RFC equivalent of
+ attrtext is the token characters, with the subtraction of '*', "'", and '%'.
+ We include tab in the excluded set just as we do for token.
+
+ """
+ attribute = Attribute()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ if value and value[0] in ATTRIBUTE_ENDS:
+ raise errors.HeaderParseError(
+ "expected token but found '{}'".format(value))
+ token, value = get_attrtext(value)
+ attribute.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ return attribute, value
+
+def get_extended_attrtext(value):
+ """attrtext = 1*(any non-ATTRIBUTE_ENDS character plus '%')
+
+ This is a special parsing routine so that we get a value that
+ includes % escapes as a single string (which we decode as a single
+ string later).
+
+ """
+ m = _non_extended_attribute_end_matcher(value)
+ if not m:
+ raise errors.HeaderParseError(
+ "expected extended attrtext but found {!r}".format(value))
+ attrtext = m.group()
+ value = value[len(attrtext):]
+ attrtext = ValueTerminal(attrtext, 'extended-attrtext')
+ _validate_xtext(attrtext)
+ return attrtext, value
+
+def get_extended_attribute(value):
+ """ [CFWS] 1*extended_attrtext [CFWS]
+
+ This is like the non-extended version except we allow % characters, so that
+ we can pick up an encoded value as a single string.
+
+ """
+ # XXX: should we have an ExtendedAttribute TokenList?
+ attribute = Attribute()
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ if value and value[0] in EXTENDED_ATTRIBUTE_ENDS:
+ raise errors.HeaderParseError(
+ "expected token but found '{}'".format(value))
+ token, value = get_extended_attrtext(value)
+ attribute.append(token)
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ attribute.append(token)
+ return attribute, value
+
+def get_section(value):
+ """ '*' digits
+
+ The formal BNF is more complicated because leading 0s are not allowed. We
+ check for that and add a defect. We also assume no CFWS is allowed between
+ the '*' and the digits, though the RFC is not crystal clear on that.
+ The caller should already have dealt with leading CFWS.
+
+ """
+ section = Section()
+ if not value or value[0] != '*':
+ raise errors.HeaderParseError("Expected section but found {}".format(
+ value))
+ section.append(ValueTerminal('*', 'section-marker'))
+ value = value[1:]
+ if not value or not value[0].isdigit():
+ raise errors.HeaderParseError("Expected section number but "
+ "found {}".format(value))
+ digits = ''
+ while value and value[0].isdigit():
+ digits += value[0]
+ value = value[1:]
+ if digits[0] == '0' and digits != '0':
+ section.defects.append(errors.InvalidHeaderError(
+ "section number has an invalid leading 0"))
+ section.number = int(digits)
+ section.append(ValueTerminal(digits, 'digits'))
+ return section, value
+
+
+def get_value(value):
+ """ quoted-string / attribute
+
+ """
+ v = Value()
+ if not value:
+ raise errors.HeaderParseError("Expected value but found end of string")
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ raise errors.HeaderParseError("Expected value but found "
+ "only {}".format(leader))
+ if value[0] == '"':
+ token, value = get_quoted_string(value)
+ else:
+ token, value = get_extended_attribute(value)
+ if leader is not None:
+ token[:0] = [leader]
+ v.append(token)
+ return v, value
+
+def get_parameter(value):
+ """ attribute [section] ["*"] [CFWS] "=" value
+
+ The CFWS is implied by the RFC but not made explicit in the BNF. This
+ simplified form of the BNF from the RFC is made to conform with the RFC BNF
+ through some extra checks. We do it this way because it makes both error
+ recovery and working with the resulting parse tree easier.
+ """
+ # It is possible CFWS would also be implicitly allowed between the section
+ # and the 'extended-attribute' marker (the '*') , but we've never seen that
+ # in the wild and we will therefore ignore the possibility.
+ param = Parameter()
+ token, value = get_attribute(value)
+ param.append(token)
+ if not value or value[0] == ';':
+ param.defects.append(errors.InvalidHeaderDefect("Parameter contains "
+ "name ({}) but no value".format(token)))
+ return param, value
+ if value[0] == '*':
+ try:
+ token, value = get_section(value)
+ param.sectioned = True
+ param.append(token)
+ except errors.HeaderParseError:
+ pass
+ if not value:
+ raise errors.HeaderParseError("Incomplete parameter")
+ if value[0] == '*':
+ param.append(ValueTerminal('*', 'extended-parameter-marker'))
+ value = value[1:]
+ param.extended = True
+ if value[0] != '=':
+ raise errors.HeaderParseError("Parameter not followed by '='")
+ param.append(ValueTerminal('=', 'parameter-separator'))
+ value = value[1:]
+ leader = None
+ if value and value[0] in CFWS_LEADER:
+ token, value = get_cfws(value)
+ param.append(token)
+ remainder = None
+ appendto = param
+ if param.extended and value and value[0] == '"':
+ # Now for some serious hackery to handle the common invalid case of
+ # double quotes around an extended value. We also accept (with defect)
+ # a value marked as encoded that isn't really.
+ qstring, remainder = get_quoted_string(value)
+ inner_value = qstring.stripped_value
+ semi_valid = False
+ if param.section_number == 0:
+ if inner_value and inner_value[0] == "'":
+ semi_valid = True
+ else:
+ token, rest = get_attrtext(inner_value)
+ if rest and rest[0] == "'":
+ semi_valid = True
+ else:
+ try:
+ token, rest = get_extended_attrtext(inner_value)
+ except:
+ pass
+ else:
+ if not rest:
+ semi_valid = True
+ if semi_valid:
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Quoted string value for extended parameter is invalid"))
+ param.append(qstring)
+ for t in qstring:
+ if t.token_type == 'bare-quoted-string':
+ t[:] = []
+ appendto = t
+ break
+ value = inner_value
+ else:
+ remainder = None
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Parameter marked as extended but appears to have a "
+ "quoted string value that is non-encoded"))
+ if value and value[0] == "'":
+ token = None
+ else:
+ token, value = get_value(value)
+ if not param.extended or param.section_number > 0:
+ if not value or value[0] != "'":
+ appendto.append(token)
+ if remainder is not None:
+ assert not value, value
+ value = remainder
+ return param, value
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Apparent initial-extended-value but attribute "
+ "was not marked as extended or was not initial section"))
+ if not value:
+ # Assume the charset/lang is missing and the token is the value.
+ param.defects.append(errors.InvalidHeaderDefect(
+ "Missing required charset/lang delimiters"))
+ appendto.append(token)
+ if remainder is None:
+ return param, value
+ else:
+ if token is not None:
+ for t in token:
+ if t.token_type == 'extended-attrtext':
+ break
+ t.token_type == 'attrtext'
+ appendto.append(t)
+ param.charset = t.value
+ if value[0] != "'":
+ raise errors.HeaderParseError("Expected RFC2231 char/lang encoding "
+ "delimiter, but found {!r}".format(value))
+ appendto.append(ValueTerminal("'", 'RFC2231-delimiter'))
+ value = value[1:]
+ if value and value[0] != "'":
+ token, value = get_attrtext(value)
+ appendto.append(token)
+ param.lang = token.value
+ if not value or value[0] != "'":
+ raise errors.HeaderParseError("Expected RFC2231 char/lang encoding "
+ "delimiter, but found {}".format(value))
+ appendto.append(ValueTerminal("'", 'RFC2231-delimiter'))
+ value = value[1:]
+ if remainder is not None:
+ # Treat the rest of value as bare quoted string content.
+ v = Value()
+ while value:
+ if value[0] in WSP:
+ token, value = get_fws(value)
+ elif value[0] == '"':
+ token = ValueTerminal('"', 'DQUOTE')
+ value = value[1:]
+ else:
+ token, value = get_qcontent(value)
+ v.append(token)
+ token = v
+ else:
+ token, value = get_value(value)
+ appendto.append(token)
+ if remainder is not None:
+ assert not value, value
+ value = remainder
+ return param, value
+
+def parse_mime_parameters(value):
+ """ parameter *( ";" parameter )
+
+ That BNF is meant to indicate this routine should only be called after
+ finding and handling the leading ';'. There is no corresponding rule in
+ the formal RFC grammar, but it is more convenient for us for the set of
+ parameters to be treated as its own TokenList.
+
+ This is 'parse' routine because it consumes the remaining value, but it
+ would never be called to parse a full header. Instead it is called to
+ parse everything after the non-parameter value of a specific MIME header.
+
+ """
+ mime_parameters = MimeParameters()
+ while value:
+ try:
+ token, value = get_parameter(value)
+ mime_parameters.append(token)
+ except errors.HeaderParseError as err:
+ leader = None
+ if value[0] in CFWS_LEADER:
+ leader, value = get_cfws(value)
+ if not value:
+ mime_parameters.append(leader)
+ return mime_parameters
+ if value[0] == ';':
+ if leader is not None:
+ mime_parameters.append(leader)
+ mime_parameters.defects.append(errors.InvalidHeaderDefect(
+ "parameter entry with no content"))
+ else:
+ token, value = get_invalid_parameter(value)
+ if leader:
+ token[:0] = [leader]
+ mime_parameters.append(token)
+ mime_parameters.defects.append(errors.InvalidHeaderDefect(
+ "invalid parameter {!r}".format(token)))
+ if value and value[0] != ';':
+ # Junk after the otherwise valid parameter. Mark it as
+ # invalid, but it will have a value.
+ param = mime_parameters[-1]
+ param.token_type = 'invalid-parameter'
+ token, value = get_invalid_parameter(value)
+ param.extend(token)
+ mime_parameters.defects.append(errors.InvalidHeaderDefect(
+ "parameter with invalid trailing text {!r}".format(token)))
+ if value:
+ # Must be a ';' at this point.
+ mime_parameters.append(ValueTerminal(';', 'parameter-separator'))
+ value = value[1:]
+ return mime_parameters
+
+def _find_mime_parameters(tokenlist, value):
+ """Do our best to find the parameters in an invalid MIME header
+
+ """
+ while value and value[0] != ';':
+ if value[0] in PHRASE_ENDS:
+ tokenlist.append(ValueTerminal(value[0], 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ tokenlist.append(token)
+ if not value:
+ return
+ tokenlist.append(ValueTerminal(';', 'parameter-separator'))
+ tokenlist.append(parse_mime_parameters(value[1:]))
+
+def parse_content_type_header(value):
+ """ maintype "/" subtype *( ";" parameter )
+
+ The maintype and substype are tokens. Theoretically they could
+ be checked against the official IANA list + x-token, but we
+ don't do that.
+ """
+ ctype = ContentType()
+ recover = False
+ if not value:
+ ctype.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing content type specification"))
+ return ctype
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Expected content maintype but found {!r}".format(value)))
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.append(token)
+ # XXX: If we really want to follow the formal grammar we should make
+ # mantype and subtype specialized TokenLists here. Probably not worth it.
+ if not value or value[0] != '/':
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Invalid content type"))
+ if value:
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.maintype = token.value.strip().lower()
+ ctype.append(ValueTerminal('/', 'content-type-separator'))
+ value = value[1:]
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Expected content subtype but found {!r}".format(value)))
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.append(token)
+ ctype.subtype = token.value.strip().lower()
+ if not value:
+ return ctype
+ if value[0] != ';':
+ ctype.defects.append(errors.InvalidHeaderDefect(
+ "Only parameters are valid after content type, but "
+ "found {!r}".format(value)))
+ # The RFC requires that a syntactically invalid content-type be treated
+ # as text/plain. Perhaps we should postel this, but we should probably
+ # only do that if we were checking the subtype value against IANA.
+ del ctype.maintype, ctype.subtype
+ _find_mime_parameters(ctype, value)
+ return ctype
+ ctype.append(ValueTerminal(';', 'parameter-separator'))
+ ctype.append(parse_mime_parameters(value[1:]))
+ return ctype
+
+def parse_content_disposition_header(value):
+ """ disposition-type *( ";" parameter )
+
+ """
+ disp_header = ContentDisposition()
+ if not value:
+ disp_header.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing content disposition"))
+ return disp_header
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ disp_header.defects.append(errors.InvalidHeaderDefect(
+ "Expected content disposition but found {!r}".format(value)))
+ _find_mime_parameters(disp_header, value)
+ return disp_header
+ disp_header.append(token)
+ disp_header.content_disposition = token.value.strip().lower()
+ if not value:
+ return disp_header
+ if value[0] != ';':
+ disp_header.defects.append(errors.InvalidHeaderDefect(
+ "Only parameters are valid after content disposition, but "
+ "found {!r}".format(value)))
+ _find_mime_parameters(disp_header, value)
+ return disp_header
+ disp_header.append(ValueTerminal(';', 'parameter-separator'))
+ disp_header.append(parse_mime_parameters(value[1:]))
+ return disp_header
+
+def parse_content_transfer_encoding_header(value):
+ """ mechanism
+
+ """
+ # We should probably validate the values, since the list is fixed.
+ cte_header = ContentTransferEncoding()
+ if not value:
+ cte_header.defects.append(errors.HeaderMissingRequiredValue(
+ "Missing content transfer encoding"))
+ return cte_header
+ try:
+ token, value = get_token(value)
+ except errors.HeaderParseError:
+ cte_header.defects.append(errors.InvalidHeaderDefect(
+ "Expected content transfer encoding but found {!r}".format(value)))
+ else:
+ cte_header.append(token)
+ cte_header.cte = token.value.strip().lower()
+ if not value:
+ return cte_header
+ while value:
+ cte_header.defects.append(errors.InvalidHeaderDefect(
+ "Extra text after content transfer encoding"))
+ if value[0] in PHRASE_ENDS:
+ cte_header.append(ValueTerminal(value[0], 'misplaced-special'))
+ value = value[1:]
+ else:
+ token, value = get_phrase(value)
+ cte_header.append(token)
+ return cte_header
+
+
+#
+# Header folding
+#
+# Header folding is complex, with lots of rules and corner cases. The
+# following code does its best to obey the rules and handle the corner
+# cases, but you can be sure there are few bugs:)
+#
+# This folder generally canonicalizes as it goes, preferring the stringified
+# version of each token. The tokens contain information that supports the
+# folder, including which tokens can be encoded in which ways.
+#
+# Folded text is accumulated in a simple list of strings ('lines'), each
+# one of which should be less than policy.max_line_length ('maxlen').
+#
+
+def _steal_trailing_WSP_if_exists(lines):
+ wsp = ''
+ if lines and lines[-1] and lines[-1][-1] in WSP:
+ wsp = lines[-1][-1]
+ lines[-1] = lines[-1][:-1]
+ return wsp
+
+def _refold_parse_tree(parse_tree, *, policy):
+ """Return string of contents of parse_tree folded according to RFC rules.
+
+ """
+ # max_line_length 0/None means no limit, ie: infinitely long.
+ maxlen = policy.max_line_length or sys.maxsize
+ encoding = 'utf-8' if policy.utf8 else 'us-ascii'
+ lines = ['']
+ last_ew = None
+ wrap_as_ew_blocked = 0
+ want_encoding = False
+ end_ew_not_allowed = Terminal('', 'wrap_as_ew_blocked')
+ parts = list(parse_tree)
+ while parts:
+ part = parts.pop(0)
+ if part is end_ew_not_allowed:
+ wrap_as_ew_blocked -= 1
+ continue
+ tstr = str(part)
+ if part.token_type == 'ptext' and set(tstr) & SPECIALS:
+ # Encode if tstr contains special characters.
+ want_encoding = True
+ try:
+ tstr.encode(encoding)
+ charset = encoding
+ except UnicodeEncodeError:
+ if any(isinstance(x, errors.UndecodableBytesDefect)
+ for x in part.all_defects):
+ charset = 'unknown-8bit'
+ else:
+ # If policy.utf8 is false this should really be taken from a
+ # 'charset' property on the policy.
+ charset = 'utf-8'
+ want_encoding = True
+ if part.token_type == 'mime-parameters':
+ # Mime parameter folding (using RFC2231) is extra special.
+ _fold_mime_parameters(part, lines, maxlen, encoding)
+ continue
+ if want_encoding and not wrap_as_ew_blocked:
+ if not part.as_ew_allowed:
+ want_encoding = False
+ last_ew = None
+ if part.syntactic_break:
+ encoded_part = part.fold(policy=policy)[:-len(policy.linesep)]
+ if policy.linesep not in encoded_part:
+ # It fits on a single line
+ if len(encoded_part) > maxlen - len(lines[-1]):
+ # But not on this one, so start a new one.
+ newline = _steal_trailing_WSP_if_exists(lines)
+ # XXX what if encoded_part has no leading FWS?
+ lines.append(newline)
+ lines[-1] += encoded_part
+ continue
+ # Either this is not a major syntactic break, so we don't
+ # want it on a line by itself even if it fits, or it
+ # doesn't fit on a line by itself. Either way, fall through
+ # to unpacking the subparts and wrapping them.
+ if not hasattr(part, 'encode'):
+ # It's not a Terminal, do each piece individually.
+ parts = list(part) + parts
+ else:
+ # It's a terminal, wrap it as an encoded word, possibly
+ # combining it with previously encoded words if allowed.
+ last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew,
+ part.ew_combine_allowed, charset)
+ want_encoding = False
+ continue
+ if len(tstr) <= maxlen - len(lines[-1]):
+ lines[-1] += tstr
+ continue
+ # This part is too long to fit. The RFC wants us to break at
+ # "major syntactic breaks", so unless we don't consider this
+ # to be one, check if it will fit on the next line by itself.
+ if (part.syntactic_break and
+ len(tstr) + 1 <= maxlen):
+ newline = _steal_trailing_WSP_if_exists(lines)
+ if newline or part.startswith_fws():
+ lines.append(newline + tstr)
+ last_ew = None
+ continue
+ if not hasattr(part, 'encode'):
+ # It's not a terminal, try folding the subparts.
+ newparts = list(part)
+ if not part.as_ew_allowed:
+ wrap_as_ew_blocked += 1
+ newparts.append(end_ew_not_allowed)
+ parts = newparts + parts
+ continue
+ if part.as_ew_allowed and not wrap_as_ew_blocked:
+ # It doesn't need CTE encoding, but encode it anyway so we can
+ # wrap it.
+ parts.insert(0, part)
+ want_encoding = True
+ continue
+ # We can't figure out how to wrap, it, so give up.
+ newline = _steal_trailing_WSP_if_exists(lines)
+ if newline or part.startswith_fws():
+ lines.append(newline + tstr)
+ else:
+ # We can't fold it onto the next line either...
+ lines[-1] += tstr
+ return policy.linesep.join(lines) + policy.linesep
+
+def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset):
+ """Fold string to_encode into lines as encoded word, combining if allowed.
+ Return the new value for last_ew, or None if ew_combine_allowed is False.
+
+ If there is already an encoded word in the last line of lines (indicated by
+ a non-None value for last_ew) and ew_combine_allowed is true, decode the
+ existing ew, combine it with to_encode, and re-encode. Otherwise, encode
+ to_encode. In either case, split to_encode as necessary so that the
+ encoded segments fit within maxlen.
+
+ """
+ if last_ew is not None and ew_combine_allowed:
+ to_encode = str(
+ get_unstructured(lines[-1][last_ew:] + to_encode))
+ lines[-1] = lines[-1][:last_ew]
+ if to_encode[0] in WSP:
+ # We're joining this to non-encoded text, so don't encode
+ # the leading blank.
+ leading_wsp = to_encode[0]
+ to_encode = to_encode[1:]
+ if (len(lines[-1]) == maxlen):
+ lines.append(_steal_trailing_WSP_if_exists(lines))
+ lines[-1] += leading_wsp
+ trailing_wsp = ''
+ if to_encode[-1] in WSP:
+ # Likewise for the trailing space.
+ trailing_wsp = to_encode[-1]
+ to_encode = to_encode[:-1]
+ new_last_ew = len(lines[-1]) if last_ew is None else last_ew
+
+ encode_as = 'utf-8' if charset == 'us-ascii' else charset
+
+ # The RFC2047 chrome takes up 7 characters plus the length
+ # of the charset name.
+ chrome_len = len(encode_as) + 7
+
+ if (chrome_len + 1) >= maxlen:
+ raise errors.HeaderParseError(
+ "max_line_length is too small to fit an encoded word")
+
+ while to_encode:
+ remaining_space = maxlen - len(lines[-1])
+ text_space = remaining_space - chrome_len
+ if text_space <= 0:
+ lines.append(' ')
+ continue
+
+ to_encode_word = to_encode[:text_space]
+ encoded_word = _ew.encode(to_encode_word, charset=encode_as)
+ excess = len(encoded_word) - remaining_space
+ while excess > 0:
+ # Since the chunk to encode is guaranteed to fit into less than 100 characters,
+ # shrinking it by one at a time shouldn't take long.
+ to_encode_word = to_encode_word[:-1]
+ encoded_word = _ew.encode(to_encode_word, charset=encode_as)
+ excess = len(encoded_word) - remaining_space
+ lines[-1] += encoded_word
+ to_encode = to_encode[len(to_encode_word):]
+
+ if to_encode:
+ lines.append(' ')
+ new_last_ew = len(lines[-1])
+ lines[-1] += trailing_wsp
+ return new_last_ew if ew_combine_allowed else None
+
+def _fold_mime_parameters(part, lines, maxlen, encoding):
+ """Fold TokenList 'part' into the 'lines' list as mime parameters.
+
+ Using the decoded list of parameters and values, format them according to
+ the RFC rules, including using RFC2231 encoding if the value cannot be
+ expressed in 'encoding' and/or the parameter+value is too long to fit
+ within 'maxlen'.
+
+ """
+ # Special case for RFC2231 encoding: start from decoded values and use
+ # RFC2231 encoding iff needed.
+ #
+ # Note that the 1 and 2s being added to the length calculations are
+ # accounting for the possibly-needed spaces and semicolons we'll be adding.
+ #
+ for name, value in part.params:
+ # XXX What if this ';' puts us over maxlen the first time through the
+ # loop? We should split the header value onto a newline in that case,
+ # but to do that we need to recognize the need earlier or reparse the
+ # header, so I'm going to ignore that bug for now. It'll only put us
+ # one character over.
+ if not lines[-1].rstrip().endswith(';'):
+ lines[-1] += ';'
+ charset = encoding
+ error_handler = 'strict'
+ try:
+ value.encode(encoding)
+ encoding_required = False
+ except UnicodeEncodeError:
+ encoding_required = True
+ if utils._has_surrogates(value):
+ charset = 'unknown-8bit'
+ error_handler = 'surrogateescape'
+ else:
+ charset = 'utf-8'
+ if encoding_required:
+ encoded_value = urllib.parse.quote(
+ value, safe='', errors=error_handler)
+ tstr = "{}*={}''{}".format(name, charset, encoded_value)
+ else:
+ tstr = '{}={}'.format(name, quote_string(value))
+ if len(lines[-1]) + len(tstr) + 1 < maxlen:
+ lines[-1] = lines[-1] + ' ' + tstr
+ continue
+ elif len(tstr) + 2 <= maxlen:
+ lines.append(' ' + tstr)
+ continue
+ # We need multiple sections. We are allowed to mix encoded and
+ # non-encoded sections, but we aren't going to. We'll encode them all.
+ section = 0
+ extra_chrome = charset + "''"
+ while value:
+ chrome_len = len(name) + len(str(section)) + 3 + len(extra_chrome)
+ if maxlen <= chrome_len + 3:
+ # We need room for the leading blank, the trailing semicolon,
+ # and at least one character of the value. If we don't
+ # have that, we'd be stuck, so in that case fall back to
+ # the RFC standard width.
+ maxlen = 78
+ splitpoint = maxchars = maxlen - chrome_len - 2
+ while True:
+ partial = value[:splitpoint]
+ encoded_value = urllib.parse.quote(
+ partial, safe='', errors=error_handler)
+ if len(encoded_value) <= maxchars:
+ break
+ splitpoint -= 1
+ lines.append(" {}*{}*={}{}".format(
+ name, section, extra_chrome, encoded_value))
+ extra_chrome = ''
+ section += 1
+ value = value[splitpoint:]
+ if value:
+ lines[-1] += ';'
diff --git a/dist/lib/email/_parseaddr.py b/dist/lib/email/_parseaddr.py
new file mode 100644
index 0000000..41ff6f8
--- /dev/null
+++ b/dist/lib/email/_parseaddr.py
@@ -0,0 +1,549 @@
+# Copyright (C) 2002-2007 Python Software Foundation
+# Contact: email-sig@python.org
+
+"""Email address parsing code.
+
+Lifted directly from rfc822.py. This should eventually be rewritten.
+"""
+
+__all__ = [
+ 'mktime_tz',
+ 'parsedate',
+ 'parsedate_tz',
+ 'quote',
+ ]
+
+import time, calendar
+
+SPACE = ' '
+EMPTYSTRING = ''
+COMMASPACE = ', '
+
+# Parse a date field
+_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
+ 'aug', 'sep', 'oct', 'nov', 'dec',
+ 'january', 'february', 'march', 'april', 'may', 'june', 'july',
+ 'august', 'september', 'october', 'november', 'december']
+
+_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
+
+# The timezone table does not include the military time zones defined
+# in RFC822, other than Z. According to RFC1123, the description in
+# RFC822 gets the signs wrong, so we can't rely on any such time
+# zones. RFC1123 recommends that numeric timezone indicators be used
+# instead of timezone names.
+
+_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
+ 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
+ 'EST': -500, 'EDT': -400, # Eastern
+ 'CST': -600, 'CDT': -500, # Central
+ 'MST': -700, 'MDT': -600, # Mountain
+ 'PST': -800, 'PDT': -700 # Pacific
+ }
+
+
+def parsedate_tz(data):
+ """Convert a date string to a time tuple.
+
+ Accounts for military timezones.
+ """
+ res = _parsedate_tz(data)
+ if not res:
+ return
+ if res[9] is None:
+ res[9] = 0
+ return tuple(res)
+
+def _parsedate_tz(data):
+ """Convert date to extended time tuple.
+
+ The last (additional) element is the time zone offset in seconds, except if
+ the timezone was specified as -0000. In that case the last element is
+ None. This indicates a UTC timestamp that explicitly declaims knowledge of
+ the source timezone, as opposed to a +0000 timestamp that indicates the
+ source timezone really was UTC.
+
+ """
+ if not data:
+ return
+ data = data.split()
+ # The FWS after the comma after the day-of-week is optional, so search and
+ # adjust for this.
+ if data[0].endswith(',') or data[0].lower() in _daynames:
+ # There's a dayname here. Skip it
+ del data[0]
+ else:
+ i = data[0].rfind(',')
+ if i >= 0:
+ data[0] = data[0][i+1:]
+ if len(data) == 3: # RFC 850 date, deprecated
+ stuff = data[0].split('-')
+ if len(stuff) == 3:
+ data = stuff + data[1:]
+ if len(data) == 4:
+ s = data[3]
+ i = s.find('+')
+ if i == -1:
+ i = s.find('-')
+ if i > 0:
+ data[3:] = [s[:i], s[i:]]
+ else:
+ data.append('') # Dummy tz
+ if len(data) < 5:
+ return None
+ data = data[:5]
+ [dd, mm, yy, tm, tz] = data
+ mm = mm.lower()
+ if mm not in _monthnames:
+ dd, mm = mm, dd.lower()
+ if mm not in _monthnames:
+ return None
+ mm = _monthnames.index(mm) + 1
+ if mm > 12:
+ mm -= 12
+ if dd[-1] == ',':
+ dd = dd[:-1]
+ i = yy.find(':')
+ if i > 0:
+ yy, tm = tm, yy
+ if yy[-1] == ',':
+ yy = yy[:-1]
+ if not yy[0].isdigit():
+ yy, tz = tz, yy
+ if tm[-1] == ',':
+ tm = tm[:-1]
+ tm = tm.split(':')
+ if len(tm) == 2:
+ [thh, tmm] = tm
+ tss = '0'
+ elif len(tm) == 3:
+ [thh, tmm, tss] = tm
+ elif len(tm) == 1 and '.' in tm[0]:
+ # Some non-compliant MUAs use '.' to separate time elements.
+ tm = tm[0].split('.')
+ if len(tm) == 2:
+ [thh, tmm] = tm
+ tss = 0
+ elif len(tm) == 3:
+ [thh, tmm, tss] = tm
+ else:
+ return None
+ try:
+ yy = int(yy)
+ dd = int(dd)
+ thh = int(thh)
+ tmm = int(tmm)
+ tss = int(tss)
+ except ValueError:
+ return None
+ # Check for a yy specified in two-digit format, then convert it to the
+ # appropriate four-digit format, according to the POSIX standard. RFC 822
+ # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
+ # mandates a 4-digit yy. For more information, see the documentation for
+ # the time module.
+ if yy < 100:
+ # The year is between 1969 and 1999 (inclusive).
+ if yy > 68:
+ yy += 1900
+ # The year is between 2000 and 2068 (inclusive).
+ else:
+ yy += 2000
+ tzoffset = None
+ tz = tz.upper()
+ if tz in _timezones:
+ tzoffset = _timezones[tz]
+ else:
+ try:
+ tzoffset = int(tz)
+ except ValueError:
+ pass
+ if tzoffset==0 and tz.startswith('-'):
+ tzoffset = None
+ # Convert a timezone offset into seconds ; -0500 -> -18000
+ if tzoffset:
+ if tzoffset < 0:
+ tzsign = -1
+ tzoffset = -tzoffset
+ else:
+ tzsign = 1
+ tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
+ # Daylight Saving Time flag is set to -1, since DST is unknown.
+ return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
+
+
+def parsedate(data):
+ """Convert a time string to a time tuple."""
+ t = parsedate_tz(data)
+ if isinstance(t, tuple):
+ return t[:9]
+ else:
+ return t
+
+
+def mktime_tz(data):
+ """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
+ if data[9] is None:
+ # No zone info, so localtime is better assumption than GMT
+ return time.mktime(data[:8] + (-1,))
+ else:
+ t = calendar.timegm(data)
+ return t - data[9]
+
+
+def quote(str):
+ """Prepare string to be used in a quoted string.
+
+ Turns backslash and double quote characters into quoted pairs. These
+ are the only characters that need to be quoted inside a quoted string.
+ Does not add the surrounding double quotes.
+ """
+ return str.replace('\\', '\\\\').replace('"', '\\"')
+
+
+class AddrlistClass:
+ """Address parser class by Ben Escoto.
+
+ To understand what this class does, it helps to have a copy of RFC 2822 in
+ front of you.
+
+ Note: this class interface is deprecated and may be removed in the future.
+ Use email.utils.AddressList instead.
+ """
+
+ def __init__(self, field):
+ """Initialize a new instance.
+
+ `field' is an unparsed address header field, containing
+ one or more addresses.
+ """
+ self.specials = '()<>@,:;.\"[]'
+ self.pos = 0
+ self.LWS = ' \t'
+ self.CR = '\r\n'
+ self.FWS = self.LWS + self.CR
+ self.atomends = self.specials + self.LWS + self.CR
+ # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
+ # is obsolete syntax. RFC 2822 requires that we recognize obsolete
+ # syntax, so allow dots in phrases.
+ self.phraseends = self.atomends.replace('.', '')
+ self.field = field
+ self.commentlist = []
+
+ def gotonext(self):
+ """Skip white space and extract comments."""
+ wslist = []
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.LWS + '\n\r':
+ if self.field[self.pos] not in '\n\r':
+ wslist.append(self.field[self.pos])
+ self.pos += 1
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ else:
+ break
+ return EMPTYSTRING.join(wslist)
+
+ def getaddrlist(self):
+ """Parse all addresses.
+
+ Returns a list containing all of the addresses.
+ """
+ result = []
+ while self.pos < len(self.field):
+ ad = self.getaddress()
+ if ad:
+ result += ad
+ else:
+ result.append(('', ''))
+ return result
+
+ def getaddress(self):
+ """Parse the next address."""
+ self.commentlist = []
+ self.gotonext()
+
+ oldpos = self.pos
+ oldcl = self.commentlist
+ plist = self.getphraselist()
+
+ self.gotonext()
+ returnlist = []
+
+ if self.pos >= len(self.field):
+ # Bad email address technically, no domain.
+ if plist:
+ returnlist = [(SPACE.join(self.commentlist), plist[0])]
+
+ elif self.field[self.pos] in '.@':
+ # email address is just an addrspec
+ # this isn't very efficient since we start over
+ self.pos = oldpos
+ self.commentlist = oldcl
+ addrspec = self.getaddrspec()
+ returnlist = [(SPACE.join(self.commentlist), addrspec)]
+
+ elif self.field[self.pos] == ':':
+ # address is a group
+ returnlist = []
+
+ fieldlen = len(self.field)
+ self.pos += 1
+ while self.pos < len(self.field):
+ self.gotonext()
+ if self.pos < fieldlen and self.field[self.pos] == ';':
+ self.pos += 1
+ break
+ returnlist = returnlist + self.getaddress()
+
+ elif self.field[self.pos] == '<':
+ # Address is a phrase then a route addr
+ routeaddr = self.getrouteaddr()
+
+ if self.commentlist:
+ returnlist = [(SPACE.join(plist) + ' (' +
+ ' '.join(self.commentlist) + ')', routeaddr)]
+ else:
+ returnlist = [(SPACE.join(plist), routeaddr)]
+
+ else:
+ if plist:
+ returnlist = [(SPACE.join(self.commentlist), plist[0])]
+ elif self.field[self.pos] in self.specials:
+ self.pos += 1
+
+ self.gotonext()
+ if self.pos < len(self.field) and self.field[self.pos] == ',':
+ self.pos += 1
+ return returnlist
+
+ def getrouteaddr(self):
+ """Parse a route address (Return-path value).
+
+ This method just skips all the route stuff and returns the addrspec.
+ """
+ if self.field[self.pos] != '<':
+ return
+
+ expectroute = False
+ self.pos += 1
+ self.gotonext()
+ adlist = ''
+ while self.pos < len(self.field):
+ if expectroute:
+ self.getdomain()
+ expectroute = False
+ elif self.field[self.pos] == '>':
+ self.pos += 1
+ break
+ elif self.field[self.pos] == '@':
+ self.pos += 1
+ expectroute = True
+ elif self.field[self.pos] == ':':
+ self.pos += 1
+ else:
+ adlist = self.getaddrspec()
+ self.pos += 1
+ break
+ self.gotonext()
+
+ return adlist
+
+ def getaddrspec(self):
+ """Parse an RFC 2822 addr-spec."""
+ aslist = []
+
+ self.gotonext()
+ while self.pos < len(self.field):
+ preserve_ws = True
+ if self.field[self.pos] == '.':
+ if aslist and not aslist[-1].strip():
+ aslist.pop()
+ aslist.append('.')
+ self.pos += 1
+ preserve_ws = False
+ elif self.field[self.pos] == '"':
+ aslist.append('"%s"' % quote(self.getquote()))
+ elif self.field[self.pos] in self.atomends:
+ if aslist and not aslist[-1].strip():
+ aslist.pop()
+ break
+ else:
+ aslist.append(self.getatom())
+ ws = self.gotonext()
+ if preserve_ws and ws:
+ aslist.append(ws)
+
+ if self.pos >= len(self.field) or self.field[self.pos] != '@':
+ return EMPTYSTRING.join(aslist)
+
+ aslist.append('@')
+ self.pos += 1
+ self.gotonext()
+ domain = self.getdomain()
+ if not domain:
+ # Invalid domain, return an empty address instead of returning a
+ # local part to denote failed parsing.
+ return EMPTYSTRING
+ return EMPTYSTRING.join(aslist) + domain
+
+ def getdomain(self):
+ """Get the complete domain name from an address."""
+ sdlist = []
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.LWS:
+ self.pos += 1
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ elif self.field[self.pos] == '[':
+ sdlist.append(self.getdomainliteral())
+ elif self.field[self.pos] == '.':
+ self.pos += 1
+ sdlist.append('.')
+ elif self.field[self.pos] == '@':
+ # bpo-34155: Don't parse domains with two `@` like
+ # `a@malicious.org@important.com`.
+ return EMPTYSTRING
+ elif self.field[self.pos] in self.atomends:
+ break
+ else:
+ sdlist.append(self.getatom())
+ return EMPTYSTRING.join(sdlist)
+
+ def getdelimited(self, beginchar, endchars, allowcomments=True):
+ """Parse a header fragment delimited by special characters.
+
+ `beginchar' is the start character for the fragment.
+ If self is not looking at an instance of `beginchar' then
+ getdelimited returns the empty string.
+
+ `endchars' is a sequence of allowable end-delimiting characters.
+ Parsing stops when one of these is encountered.
+
+ If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
+ within the parsed fragment.
+ """
+ if self.field[self.pos] != beginchar:
+ return ''
+
+ slist = ['']
+ quote = False
+ self.pos += 1
+ while self.pos < len(self.field):
+ if quote:
+ slist.append(self.field[self.pos])
+ quote = False
+ elif self.field[self.pos] in endchars:
+ self.pos += 1
+ break
+ elif allowcomments and self.field[self.pos] == '(':
+ slist.append(self.getcomment())
+ continue # have already advanced pos from getcomment
+ elif self.field[self.pos] == '\\':
+ quote = True
+ else:
+ slist.append(self.field[self.pos])
+ self.pos += 1
+
+ return EMPTYSTRING.join(slist)
+
+ def getquote(self):
+ """Get a quote-delimited fragment from self's field."""
+ return self.getdelimited('"', '"\r', False)
+
+ def getcomment(self):
+ """Get a parenthesis-delimited fragment from self's field."""
+ return self.getdelimited('(', ')\r', True)
+
+ def getdomainliteral(self):
+ """Parse an RFC 2822 domain-literal."""
+ return '[%s]' % self.getdelimited('[', ']\r', False)
+
+ def getatom(self, atomends=None):
+ """Parse an RFC 2822 atom.
+
+ Optional atomends specifies a different set of end token delimiters
+ (the default is to use self.atomends). This is used e.g. in
+ getphraselist() since phrase endings must not include the `.' (which
+ is legal in phrases)."""
+ atomlist = ['']
+ if atomends is None:
+ atomends = self.atomends
+
+ while self.pos < len(self.field):
+ if self.field[self.pos] in atomends:
+ break
+ else:
+ atomlist.append(self.field[self.pos])
+ self.pos += 1
+
+ return EMPTYSTRING.join(atomlist)
+
+ def getphraselist(self):
+ """Parse a sequence of RFC 2822 phrases.
+
+ A phrase is a sequence of words, which are in turn either RFC 2822
+ atoms or quoted-strings. Phrases are canonicalized by squeezing all
+ runs of continuous whitespace into one space.
+ """
+ plist = []
+
+ while self.pos < len(self.field):
+ if self.field[self.pos] in self.FWS:
+ self.pos += 1
+ elif self.field[self.pos] == '"':
+ plist.append(self.getquote())
+ elif self.field[self.pos] == '(':
+ self.commentlist.append(self.getcomment())
+ elif self.field[self.pos] in self.phraseends:
+ break
+ else:
+ plist.append(self.getatom(self.phraseends))
+
+ return plist
+
+class AddressList(AddrlistClass):
+ """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
+ def __init__(self, field):
+ AddrlistClass.__init__(self, field)
+ if field:
+ self.addresslist = self.getaddrlist()
+ else:
+ self.addresslist = []
+
+ def __len__(self):
+ return len(self.addresslist)
+
+ def __add__(self, other):
+ # Set union
+ newaddr = AddressList(None)
+ newaddr.addresslist = self.addresslist[:]
+ for x in other.addresslist:
+ if not x in self.addresslist:
+ newaddr.addresslist.append(x)
+ return newaddr
+
+ def __iadd__(self, other):
+ # Set union, in-place
+ for x in other.addresslist:
+ if not x in self.addresslist:
+ self.addresslist.append(x)
+ return self
+
+ def __sub__(self, other):
+ # Set difference
+ newaddr = AddressList(None)
+ for x in self.addresslist:
+ if not x in other.addresslist:
+ newaddr.addresslist.append(x)
+ return newaddr
+
+ def __isub__(self, other):
+ # Set difference, in-place
+ for x in other.addresslist:
+ if x in self.addresslist:
+ self.addresslist.remove(x)
+ return self
+
+ def __getitem__(self, index):
+ # Make indexing, slices, and 'in' work
+ return self.addresslist[index]
diff --git a/dist/lib/email/_policybase.py b/dist/lib/email/_policybase.py
new file mode 100644
index 0000000..c9cbadd
--- /dev/null
+++ b/dist/lib/email/_policybase.py
@@ -0,0 +1,374 @@
+"""Policy framework for the email package.
+
+Allows fine grained feature control of how the package parses and emits data.
+"""
+
+import abc
+from email import header
+from email import charset as _charset
+from email.utils import _has_surrogates
+
+__all__ = [
+ 'Policy',
+ 'Compat32',
+ 'compat32',
+ ]
+
+
+class _PolicyBase:
+
+ """Policy Object basic framework.
+
+ This class is useless unless subclassed. A subclass should define
+ class attributes with defaults for any values that are to be
+ managed by the Policy object. The constructor will then allow
+ non-default values to be set for these attributes at instance
+ creation time. The instance will be callable, taking these same
+ attributes keyword arguments, and returning a new instance
+ identical to the called instance except for those values changed
+ by the keyword arguments. Instances may be added, yielding new
+ instances with any non-default values from the right hand
+ operand overriding those in the left hand operand. That is,
+
+ A + B == A()
+
+ The repr of an instance can be used to reconstruct the object
+ if and only if the repr of the values can be used to reconstruct
+ those values.
+
+ """
+
+ def __init__(self, **kw):
+ """Create new Policy, possibly overriding some defaults.
+
+ See class docstring for a list of overridable attributes.
+
+ """
+ for name, value in kw.items():
+ if hasattr(self, name):
+ super(_PolicyBase,self).__setattr__(name, value)
+ else:
+ raise TypeError(
+ "{!r} is an invalid keyword argument for {}".format(
+ name, self.__class__.__name__))
+
+ def __repr__(self):
+ args = [ "{}={!r}".format(name, value)
+ for name, value in self.__dict__.items() ]
+ return "{}({})".format(self.__class__.__name__, ', '.join(args))
+
+ def clone(self, **kw):
+ """Return a new instance with specified attributes changed.
+
+ The new instance has the same attribute values as the current object,
+ except for the changes passed in as keyword arguments.
+
+ """
+ newpolicy = self.__class__.__new__(self.__class__)
+ for attr, value in self.__dict__.items():
+ object.__setattr__(newpolicy, attr, value)
+ for attr, value in kw.items():
+ if not hasattr(self, attr):
+ raise TypeError(
+ "{!r} is an invalid keyword argument for {}".format(
+ attr, self.__class__.__name__))
+ object.__setattr__(newpolicy, attr, value)
+ return newpolicy
+
+ def __setattr__(self, name, value):
+ if hasattr(self, name):
+ msg = "{!r} object attribute {!r} is read-only"
+ else:
+ msg = "{!r} object has no attribute {!r}"
+ raise AttributeError(msg.format(self.__class__.__name__, name))
+
+ def __add__(self, other):
+ """Non-default values from right operand override those from left.
+
+ The object returned is a new instance of the subclass.
+
+ """
+ return self.clone(**other.__dict__)
+
+
+def _append_doc(doc, added_doc):
+ doc = doc.rsplit('\n', 1)[0]
+ added_doc = added_doc.split('\n', 1)[1]
+ return doc + '\n' + added_doc
+
+def _extend_docstrings(cls):
+ if cls.__doc__ and cls.__doc__.startswith('+'):
+ cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__)
+ for name, attr in cls.__dict__.items():
+ if attr.__doc__ and attr.__doc__.startswith('+'):
+ for c in (c for base in cls.__bases__ for c in base.mro()):
+ doc = getattr(getattr(c, name), '__doc__')
+ if doc:
+ attr.__doc__ = _append_doc(doc, attr.__doc__)
+ break
+ return cls
+
+
+class Policy(_PolicyBase, metaclass=abc.ABCMeta):
+
+ r"""Controls for how messages are interpreted and formatted.
+
+ Most of the classes and many of the methods in the email package accept
+ Policy objects as parameters. A Policy object contains a set of values and
+ functions that control how input is interpreted and how output is rendered.
+ For example, the parameter 'raise_on_defect' controls whether or not an RFC
+ violation results in an error being raised or not, while 'max_line_length'
+ controls the maximum length of output lines when a Message is serialized.
+
+ Any valid attribute may be overridden when a Policy is created by passing
+ it as a keyword argument to the constructor. Policy objects are immutable,
+ but a new Policy object can be created with only certain values changed by
+ calling the Policy instance with keyword arguments. Policy objects can
+ also be added, producing a new Policy object in which the non-default
+ attributes set in the right hand operand overwrite those specified in the
+ left operand.
+
+ Settable attributes:
+
+ raise_on_defect -- If true, then defects should be raised as errors.
+ Default: False.
+
+ linesep -- string containing the value to use as separation
+ between output lines. Default '\n'.
+
+ cte_type -- Type of allowed content transfer encodings
+
+ 7bit -- ASCII only
+ 8bit -- Content-Transfer-Encoding: 8bit is allowed
+
+ Default: 8bit. Also controls the disposition of
+ (RFC invalid) binary data in headers; see the
+ documentation of the binary_fold method.
+
+ max_line_length -- maximum length of lines, excluding 'linesep',
+ during serialization. None or 0 means no line
+ wrapping is done. Default is 78.
+
+ mangle_from_ -- a flag that, when True escapes From_ lines in the
+ body of the message by putting a `>' in front of
+ them. This is used when the message is being
+ serialized by a generator. Default: True.
+
+ message_factory -- the class to use to create new message objects.
+ If the value is None, the default is Message.
+
+ """
+
+ raise_on_defect = False
+ linesep = '\n'
+ cte_type = '8bit'
+ max_line_length = 78
+ mangle_from_ = False
+ message_factory = None
+
+ def handle_defect(self, obj, defect):
+ """Based on policy, either raise defect or call register_defect.
+
+ handle_defect(obj, defect)
+
+ defect should be a Defect subclass, but in any case must be an
+ Exception subclass. obj is the object on which the defect should be
+ registered if it is not raised. If the raise_on_defect is True, the
+ defect is raised as an error, otherwise the object and the defect are
+ passed to register_defect.
+
+ This method is intended to be called by parsers that discover defects.
+ The email package parsers always call it with Defect instances.
+
+ """
+ if self.raise_on_defect:
+ raise defect
+ self.register_defect(obj, defect)
+
+ def register_defect(self, obj, defect):
+ """Record 'defect' on 'obj'.
+
+ Called by handle_defect if raise_on_defect is False. This method is
+ part of the Policy API so that Policy subclasses can implement custom
+ defect handling. The default implementation calls the append method of
+ the defects attribute of obj. The objects used by the email package by
+ default that get passed to this method will always have a defects
+ attribute with an append method.
+
+ """
+ obj.defects.append(defect)
+
+ def header_max_count(self, name):
+ """Return the maximum allowed number of headers named 'name'.
+
+ Called when a header is added to a Message object. If the returned
+ value is not 0 or None, and there are already a number of headers with
+ the name 'name' equal to the value returned, a ValueError is raised.
+
+ Because the default behavior of Message's __setitem__ is to append the
+ value to the list of headers, it is easy to create duplicate headers
+ without realizing it. This method allows certain headers to be limited
+ in the number of instances of that header that may be added to a
+ Message programmatically. (The limit is not observed by the parser,
+ which will faithfully produce as many headers as exist in the message
+ being parsed.)
+
+ The default implementation returns None for all header names.
+ """
+ return None
+
+ @abc.abstractmethod
+ def header_source_parse(self, sourcelines):
+ """Given a list of linesep terminated strings constituting the lines of
+ a single header, return the (name, value) tuple that should be stored
+ in the model. The input lines should retain their terminating linesep
+ characters. The lines passed in by the email package may contain
+ surrogateescaped binary data.
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def header_store_parse(self, name, value):
+ """Given the header name and the value provided by the application
+ program, return the (name, value) that should be stored in the model.
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def header_fetch_parse(self, name, value):
+ """Given the header name and the value from the model, return the value
+ to be returned to the application program that is requesting that
+ header. The value passed in by the email package may contain
+ surrogateescaped binary data if the lines were parsed by a BytesParser.
+ The returned value should not contain any surrogateescaped data.
+
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def fold(self, name, value):
+ """Given the header name and the value from the model, return a string
+ containing linesep characters that implement the folding of the header
+ according to the policy controls. The value passed in by the email
+ package may contain surrogateescaped binary data if the lines were
+ parsed by a BytesParser. The returned value should not contain any
+ surrogateescaped data.
+
+ """
+ raise NotImplementedError
+
+ @abc.abstractmethod
+ def fold_binary(self, name, value):
+ """Given the header name and the value from the model, return binary
+ data containing linesep characters that implement the folding of the
+ header according to the policy controls. The value passed in by the
+ email package may contain surrogateescaped binary data.
+
+ """
+ raise NotImplementedError
+
+
+@_extend_docstrings
+class Compat32(Policy):
+
+ """+
+ This particular policy is the backward compatibility Policy. It
+ replicates the behavior of the email package version 5.1.
+ """
+
+ mangle_from_ = True
+
+ def _sanitize_header(self, name, value):
+ # If the header value contains surrogates, return a Header using
+ # the unknown-8bit charset to encode the bytes as encoded words.
+ if not isinstance(value, str):
+ # Assume it is already a header object
+ return value
+ if _has_surrogates(value):
+ return header.Header(value, charset=_charset.UNKNOWN8BIT,
+ header_name=name)
+ else:
+ return value
+
+ def header_source_parse(self, sourcelines):
+ """+
+ The name is parsed as everything up to the ':' and returned unmodified.
+ The value is determined by stripping leading whitespace off the
+ remainder of the first line, joining all subsequent lines together, and
+ stripping any trailing carriage return or linefeed characters.
+
+ """
+ name, value = sourcelines[0].split(':', 1)
+ value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+ return (name, value.rstrip('\r\n'))
+
+ def header_store_parse(self, name, value):
+ """+
+ The name and value are returned unmodified.
+ """
+ return (name, value)
+
+ def header_fetch_parse(self, name, value):
+ """+
+ If the value contains binary data, it is converted into a Header object
+ using the unknown-8bit charset. Otherwise it is returned unmodified.
+ """
+ return self._sanitize_header(name, value)
+
+ def fold(self, name, value):
+ """+
+ Headers are folded using the Header folding algorithm, which preserves
+ existing line breaks in the value, and wraps each resulting line to the
+ max_line_length. Non-ASCII binary data are CTE encoded using the
+ unknown-8bit charset.
+
+ """
+ return self._fold(name, value, sanitize=True)
+
+ def fold_binary(self, name, value):
+ """+
+ Headers are folded using the Header folding algorithm, which preserves
+ existing line breaks in the value, and wraps each resulting line to the
+ max_line_length. If cte_type is 7bit, non-ascii binary data is CTE
+ encoded using the unknown-8bit charset. Otherwise the original source
+ header is used, with its existing line breaks and/or binary data.
+
+ """
+ folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
+ return folded.encode('ascii', 'surrogateescape')
+
+ def _fold(self, name, value, sanitize):
+ parts = []
+ parts.append('%s: ' % name)
+ if isinstance(value, str):
+ if _has_surrogates(value):
+ if sanitize:
+ h = header.Header(value,
+ charset=_charset.UNKNOWN8BIT,
+ header_name=name)
+ else:
+ # If we have raw 8bit data in a byte string, we have no idea
+ # what the encoding is. There is no safe way to split this
+ # string. If it's ascii-subset, then we could do a normal
+ # ascii split, but if it's multibyte then we could break the
+ # string. There's no way to know so the least harm seems to
+ # be to not split the string and risk it being too long.
+ parts.append(value)
+ h = None
+ else:
+ h = header.Header(value, header_name=name)
+ else:
+ # Assume it is a Header-like object.
+ h = value
+ if h is not None:
+ # The Header class interprets a value of None for maxlinelen as the
+ # default value of 78, as recommended by RFC 2822.
+ maxlinelen = 0
+ if self.max_line_length is not None:
+ maxlinelen = self.max_line_length
+ parts.append(h.encode(linesep=self.linesep, maxlinelen=maxlinelen))
+ parts.append(self.linesep)
+ return ''.join(parts)
+
+
+compat32 = Compat32()
diff --git a/dist/lib/email/base64mime.py b/dist/lib/email/base64mime.py
new file mode 100644
index 0000000..17f0818
--- /dev/null
+++ b/dist/lib/email/base64mime.py
@@ -0,0 +1,119 @@
+# Copyright (C) 2002-2007 Python Software Foundation
+# Author: Ben Gertzfield
+# Contact: email-sig@python.org
+
+"""Base64 content transfer encoding per RFCs 2045-2047.
+
+This module handles the content transfer encoding method defined in RFC 2045
+to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
+characters encoding known as Base64.
+
+It is used in the MIME standards for email to attach images, audio, and text
+using some 8-bit character sets to messages.
+
+This module provides an interface to encode and decode both headers and bodies
+with Base64 encoding.
+
+RFC 2045 defines a method for including character set information in an
+`encoded-word' in a header. This method is commonly used for 8-bit real names
+in To:, From:, Cc:, etc. fields, as well as Subject: lines.
+
+This module does not do the line wrapping or end-of-line character conversion
+necessary for proper internationalized headers; it only does dumb encoding and
+decoding. To deal with the various line wrapping issues, use the email.header
+module.
+"""
+
+__all__ = [
+ 'body_decode',
+ 'body_encode',
+ 'decode',
+ 'decodestring',
+ 'header_encode',
+ 'header_length',
+ ]
+
+
+from base64 import b64encode
+from binascii import b2a_base64, a2b_base64
+
+CRLF = '\r\n'
+NL = '\n'
+EMPTYSTRING = ''
+
+# See also Charset.py
+MISC_LEN = 7
+
+
+
+# Helpers
+def header_length(bytearray):
+ """Return the length of s when it is encoded with base64."""
+ groups_of_3, leftover = divmod(len(bytearray), 3)
+ # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
+ n = groups_of_3 * 4
+ if leftover:
+ n += 4
+ return n
+
+
+
+def header_encode(header_bytes, charset='iso-8859-1'):
+ """Encode a single header line with Base64 encoding in a given charset.
+
+ charset names the character set to use to encode the header. It defaults
+ to iso-8859-1. Base64 encoding is defined in RFC 2045.
+ """
+ if not header_bytes:
+ return ""
+ if isinstance(header_bytes, str):
+ header_bytes = header_bytes.encode(charset)
+ encoded = b64encode(header_bytes).decode("ascii")
+ return '=?%s?b?%s?=' % (charset, encoded)
+
+
+
+def body_encode(s, maxlinelen=76, eol=NL):
+ r"""Encode a string with base64.
+
+ Each line will be wrapped at, at most, maxlinelen characters (defaults to
+ 76 characters).
+
+ Each line of encoded text will end with eol, which defaults to "\n". Set
+ this to "\r\n" if you will be using the result of this function directly
+ in an email.
+ """
+ if not s:
+ return s
+
+ encvec = []
+ max_unencoded = maxlinelen * 3 // 4
+ for i in range(0, len(s), max_unencoded):
+ # BAW: should encode() inherit b2a_base64()'s dubious behavior in
+ # adding a newline to the encoded string?
+ enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
+ if enc.endswith(NL) and eol != NL:
+ enc = enc[:-1] + eol
+ encvec.append(enc)
+ return EMPTYSTRING.join(encvec)
+
+
+
+def decode(string):
+ """Decode a raw base64 string, returning a bytes object.
+
+ This function does not parse a full MIME header value encoded with
+ base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high
+ level email.header class for that functionality.
+ """
+ if not string:
+ return bytes()
+ elif isinstance(string, str):
+ return a2b_base64(string.encode('raw-unicode-escape'))
+ else:
+ return a2b_base64(string)
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_decode = decode
+decodestring = decode
diff --git a/dist/lib/email/charset.py b/dist/lib/email/charset.py
new file mode 100644
index 0000000..d3d759a
--- /dev/null
+++ b/dist/lib/email/charset.py
@@ -0,0 +1,404 @@
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Ben Gertzfield, Barry Warsaw
+# Contact: email-sig@python.org
+
+__all__ = [
+ 'Charset',
+ 'add_alias',
+ 'add_charset',
+ 'add_codec',
+ ]
+
+from functools import partial
+
+import email.base64mime
+import email.quoprimime
+
+from email import errors
+from email.encoders import encode_7or8bit
+
+
+
+# Flags for types of header encodings
+QP = 1 # Quoted-Printable
+BASE64 = 2 # Base64
+SHORTEST = 3 # the shorter of QP and base64, but only for headers
+
+# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
+RFC2047_CHROME_LEN = 7
+
+DEFAULT_CHARSET = 'us-ascii'
+UNKNOWN8BIT = 'unknown-8bit'
+EMPTYSTRING = ''
+
+
+
+# Defaults
+CHARSETS = {
+ # input header enc body enc output conv
+ 'iso-8859-1': (QP, QP, None),
+ 'iso-8859-2': (QP, QP, None),
+ 'iso-8859-3': (QP, QP, None),
+ 'iso-8859-4': (QP, QP, None),
+ # iso-8859-5 is Cyrillic, and not especially used
+ # iso-8859-6 is Arabic, also not particularly used
+ # iso-8859-7 is Greek, QP will not make it readable
+ # iso-8859-8 is Hebrew, QP will not make it readable
+ 'iso-8859-9': (QP, QP, None),
+ 'iso-8859-10': (QP, QP, None),
+ # iso-8859-11 is Thai, QP will not make it readable
+ 'iso-8859-13': (QP, QP, None),
+ 'iso-8859-14': (QP, QP, None),
+ 'iso-8859-15': (QP, QP, None),
+ 'iso-8859-16': (QP, QP, None),
+ 'windows-1252':(QP, QP, None),
+ 'viscii': (QP, QP, None),
+ 'us-ascii': (None, None, None),
+ 'big5': (BASE64, BASE64, None),
+ 'gb2312': (BASE64, BASE64, None),
+ 'euc-jp': (BASE64, None, 'iso-2022-jp'),
+ 'shift_jis': (BASE64, None, 'iso-2022-jp'),
+ 'iso-2022-jp': (BASE64, None, None),
+ 'koi8-r': (BASE64, BASE64, None),
+ 'utf-8': (SHORTEST, BASE64, 'utf-8'),
+ }
+
+# Aliases for other commonly-used names for character sets. Map
+# them to the real ones used in email.
+ALIASES = {
+ 'latin_1': 'iso-8859-1',
+ 'latin-1': 'iso-8859-1',
+ 'latin_2': 'iso-8859-2',
+ 'latin-2': 'iso-8859-2',
+ 'latin_3': 'iso-8859-3',
+ 'latin-3': 'iso-8859-3',
+ 'latin_4': 'iso-8859-4',
+ 'latin-4': 'iso-8859-4',
+ 'latin_5': 'iso-8859-9',
+ 'latin-5': 'iso-8859-9',
+ 'latin_6': 'iso-8859-10',
+ 'latin-6': 'iso-8859-10',
+ 'latin_7': 'iso-8859-13',
+ 'latin-7': 'iso-8859-13',
+ 'latin_8': 'iso-8859-14',
+ 'latin-8': 'iso-8859-14',
+ 'latin_9': 'iso-8859-15',
+ 'latin-9': 'iso-8859-15',
+ 'latin_10':'iso-8859-16',
+ 'latin-10':'iso-8859-16',
+ 'cp949': 'ks_c_5601-1987',
+ 'euc_jp': 'euc-jp',
+ 'euc_kr': 'euc-kr',
+ 'ascii': 'us-ascii',
+ }
+
+
+# Map charsets to their Unicode codec strings.
+CODEC_MAP = {
+ 'gb2312': 'eucgb2312_cn',
+ 'big5': 'big5_tw',
+ # Hack: We don't want *any* conversion for stuff marked us-ascii, as all
+ # sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
+ # Let that stuff pass through without conversion to/from Unicode.
+ 'us-ascii': None,
+ }
+
+
+
+# Convenience functions for extending the above mappings
+def add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
+ """Add character set properties to the global registry.
+
+ charset is the input character set, and must be the canonical name of a
+ character set.
+
+ Optional header_enc and body_enc is either Charset.QP for
+ quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for
+ the shortest of qp or base64 encoding, or None for no encoding. SHORTEST
+ is only valid for header_enc. It describes how message headers and
+ message bodies in the input charset are to be encoded. Default is no
+ encoding.
+
+ Optional output_charset is the character set that the output should be
+ in. Conversions will proceed from input charset, to Unicode, to the
+ output charset when the method Charset.convert() is called. The default
+ is to output in the same character set as the input.
+
+ Both input_charset and output_charset must have Unicode codec entries in
+ the module's charset-to-codec mapping; use add_codec(charset, codecname)
+ to add codecs the module does not know about. See the codecs module's
+ documentation for more information.
+ """
+ if body_enc == SHORTEST:
+ raise ValueError('SHORTEST not allowed for body_enc')
+ CHARSETS[charset] = (header_enc, body_enc, output_charset)
+
+
+def add_alias(alias, canonical):
+ """Add a character set alias.
+
+ alias is the alias name, e.g. latin-1
+ canonical is the character set's canonical name, e.g. iso-8859-1
+ """
+ ALIASES[alias] = canonical
+
+
+def add_codec(charset, codecname):
+ """Add a codec that map characters in the given charset to/from Unicode.
+
+ charset is the canonical name of a character set. codecname is the name
+ of a Python codec, as appropriate for the second argument to the unicode()
+ built-in, or to the encode() method of a Unicode string.
+ """
+ CODEC_MAP[charset] = codecname
+
+
+
+# Convenience function for encoding strings, taking into account
+# that they might be unknown-8bit (ie: have surrogate-escaped bytes)
+def _encode(string, codec):
+ if codec == UNKNOWN8BIT:
+ return string.encode('ascii', 'surrogateescape')
+ else:
+ return string.encode(codec)
+
+
+
+class Charset:
+ """Map character sets to their email properties.
+
+ This class provides information about the requirements imposed on email
+ for a specific character set. It also provides convenience routines for
+ converting between character sets, given the availability of the
+ applicable codecs. Given a character set, it will do its best to provide
+ information on how to use that character set in an email in an
+ RFC-compliant way.
+
+ Certain character sets must be encoded with quoted-printable or base64
+ when used in email headers or bodies. Certain character sets must be
+ converted outright, and are not allowed in email. Instances of this
+ module expose the following information about a character set:
+
+ input_charset: The initial character set specified. Common aliases
+ are converted to their `official' email names (e.g. latin_1
+ is converted to iso-8859-1). Defaults to 7-bit us-ascii.
+
+ header_encoding: If the character set must be encoded before it can be
+ used in an email header, this attribute will be set to
+ Charset.QP (for quoted-printable), Charset.BASE64 (for
+ base64 encoding), or Charset.SHORTEST for the shortest of
+ QP or BASE64 encoding. Otherwise, it will be None.
+
+ body_encoding: Same as header_encoding, but describes the encoding for the
+ mail message's body, which indeed may be different than the
+ header encoding. Charset.SHORTEST is not allowed for
+ body_encoding.
+
+ output_charset: Some character sets must be converted before they can be
+ used in email headers or bodies. If the input_charset is
+ one of them, this attribute will contain the name of the
+ charset output will be converted to. Otherwise, it will
+ be None.
+
+ input_codec: The name of the Python codec used to convert the
+ input_charset to Unicode. If no conversion codec is
+ necessary, this attribute will be None.
+
+ output_codec: The name of the Python codec used to convert Unicode
+ to the output_charset. If no conversion codec is necessary,
+ this attribute will have the same value as the input_codec.
+ """
+ def __init__(self, input_charset=DEFAULT_CHARSET):
+ # RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to
+ # unicode because its .lower() is locale insensitive. If the argument
+ # is already a unicode, we leave it at that, but ensure that the
+ # charset is ASCII, as the standard (RFC XXX) requires.
+ try:
+ if isinstance(input_charset, str):
+ input_charset.encode('ascii')
+ else:
+ input_charset = str(input_charset, 'ascii')
+ except UnicodeError:
+ raise errors.CharsetError(input_charset)
+ input_charset = input_charset.lower()
+ # Set the input charset after filtering through the aliases
+ self.input_charset = ALIASES.get(input_charset, input_charset)
+ # We can try to guess which encoding and conversion to use by the
+ # charset_map dictionary. Try that first, but let the user override
+ # it.
+ henc, benc, conv = CHARSETS.get(self.input_charset,
+ (SHORTEST, BASE64, None))
+ if not conv:
+ conv = self.input_charset
+ # Set the attributes, allowing the arguments to override the default.
+ self.header_encoding = henc
+ self.body_encoding = benc
+ self.output_charset = ALIASES.get(conv, conv)
+ # Now set the codecs. If one isn't defined for input_charset,
+ # guess and try a Unicode codec with the same name as input_codec.
+ self.input_codec = CODEC_MAP.get(self.input_charset,
+ self.input_charset)
+ self.output_codec = CODEC_MAP.get(self.output_charset,
+ self.output_charset)
+
+ def __repr__(self):
+ return self.input_charset.lower()
+
+ def __eq__(self, other):
+ return str(self) == str(other).lower()
+
+ def get_body_encoding(self):
+ """Return the content-transfer-encoding used for body encoding.
+
+ This is either the string `quoted-printable' or `base64' depending on
+ the encoding used, or it is a function in which case you should call
+ the function with a single argument, the Message object being
+ encoded. The function should then set the Content-Transfer-Encoding
+ header itself to whatever is appropriate.
+
+ Returns "quoted-printable" if self.body_encoding is QP.
+ Returns "base64" if self.body_encoding is BASE64.
+ Returns conversion function otherwise.
+ """
+ assert self.body_encoding != SHORTEST
+ if self.body_encoding == QP:
+ return 'quoted-printable'
+ elif self.body_encoding == BASE64:
+ return 'base64'
+ else:
+ return encode_7or8bit
+
+ def get_output_charset(self):
+ """Return the output character set.
+
+ This is self.output_charset if that is not None, otherwise it is
+ self.input_charset.
+ """
+ return self.output_charset or self.input_charset
+
+ def header_encode(self, string):
+ """Header-encode a string by converting it first to bytes.
+
+ The type of encoding (base64 or quoted-printable) will be based on
+ this charset's `header_encoding`.
+
+ :param string: A unicode string for the header. It must be possible
+ to encode this string to bytes using the character set's
+ output codec.
+ :return: The encoded string, with RFC 2047 chrome.
+ """
+ codec = self.output_codec or 'us-ascii'
+ header_bytes = _encode(string, codec)
+ # 7bit/8bit encodings return the string unchanged (modulo conversions)
+ encoder_module = self._get_encoder(header_bytes)
+ if encoder_module is None:
+ return string
+ return encoder_module.header_encode(header_bytes, codec)
+
+ def header_encode_lines(self, string, maxlengths):
+ """Header-encode a string by converting it first to bytes.
+
+ This is similar to `header_encode()` except that the string is fit
+ into maximum line lengths as given by the argument.
+
+ :param string: A unicode string for the header. It must be possible
+ to encode this string to bytes using the character set's
+ output codec.
+ :param maxlengths: Maximum line length iterator. Each element
+ returned from this iterator will provide the next maximum line
+ length. This parameter is used as an argument to built-in next()
+ and should never be exhausted. The maximum line lengths should
+ not count the RFC 2047 chrome. These line lengths are only a
+ hint; the splitter does the best it can.
+ :return: Lines of encoded strings, each with RFC 2047 chrome.
+ """
+ # See which encoding we should use.
+ codec = self.output_codec or 'us-ascii'
+ header_bytes = _encode(string, codec)
+ encoder_module = self._get_encoder(header_bytes)
+ encoder = partial(encoder_module.header_encode, charset=codec)
+ # Calculate the number of characters that the RFC 2047 chrome will
+ # contribute to each line.
+ charset = self.get_output_charset()
+ extra = len(charset) + RFC2047_CHROME_LEN
+ # Now comes the hard part. We must encode bytes but we can't split on
+ # bytes because some character sets are variable length and each
+ # encoded word must stand on its own. So the problem is you have to
+ # encode to bytes to figure out this word's length, but you must split
+ # on characters. This causes two problems: first, we don't know how
+ # many octets a specific substring of unicode characters will get
+ # encoded to, and second, we don't know how many ASCII characters
+ # those octets will get encoded to. Unless we try it. Which seems
+ # inefficient. In the interest of being correct rather than fast (and
+ # in the hope that there will be few encoded headers in any such
+ # message), brute force it. :(
+ lines = []
+ current_line = []
+ maxlen = next(maxlengths) - extra
+ for character in string:
+ current_line.append(character)
+ this_line = EMPTYSTRING.join(current_line)
+ length = encoder_module.header_length(_encode(this_line, charset))
+ if length > maxlen:
+ # This last character doesn't fit so pop it off.
+ current_line.pop()
+ # Does nothing fit on the first line?
+ if not lines and not current_line:
+ lines.append(None)
+ else:
+ separator = (' ' if lines else '')
+ joined_line = EMPTYSTRING.join(current_line)
+ header_bytes = _encode(joined_line, codec)
+ lines.append(encoder(header_bytes))
+ current_line = [character]
+ maxlen = next(maxlengths) - extra
+ joined_line = EMPTYSTRING.join(current_line)
+ header_bytes = _encode(joined_line, codec)
+ lines.append(encoder(header_bytes))
+ return lines
+
+ def _get_encoder(self, header_bytes):
+ if self.header_encoding == BASE64:
+ return email.base64mime
+ elif self.header_encoding == QP:
+ return email.quoprimime
+ elif self.header_encoding == SHORTEST:
+ len64 = email.base64mime.header_length(header_bytes)
+ lenqp = email.quoprimime.header_length(header_bytes)
+ if len64 < lenqp:
+ return email.base64mime
+ else:
+ return email.quoprimime
+ else:
+ return None
+
+ def body_encode(self, string):
+ """Body-encode a string by converting it first to bytes.
+
+ The type of encoding (base64 or quoted-printable) will be based on
+ self.body_encoding. If body_encoding is None, we assume the
+ output charset is a 7bit encoding, so re-encoding the decoded
+ string using the ascii codec produces the correct string version
+ of the content.
+ """
+ if not string:
+ return string
+ if self.body_encoding is BASE64:
+ if isinstance(string, str):
+ string = string.encode(self.output_charset)
+ return email.base64mime.body_encode(string)
+ elif self.body_encoding is QP:
+ # quopromime.body_encode takes a string, but operates on it as if
+ # it were a list of byte codes. For a (minimal) history on why
+ # this is so, see changeset 0cf700464177. To correctly encode a
+ # character set, then, we must turn it into pseudo bytes via the
+ # latin1 charset, which will encode any byte as a single code point
+ # between 0 and 255, which is what body_encode is expecting.
+ if isinstance(string, str):
+ string = string.encode(self.output_charset)
+ string = string.decode('latin1')
+ return email.quoprimime.body_encode(string)
+ else:
+ if isinstance(string, str):
+ string = string.encode(self.output_charset).decode('ascii')
+ return string
diff --git a/dist/lib/email/contentmanager.py b/dist/lib/email/contentmanager.py
new file mode 100644
index 0000000..b91fb0e
--- /dev/null
+++ b/dist/lib/email/contentmanager.py
@@ -0,0 +1,250 @@
+import binascii
+import email.charset
+import email.message
+import email.errors
+from email import quoprimime
+
+class ContentManager:
+
+ def __init__(self):
+ self.get_handlers = {}
+ self.set_handlers = {}
+
+ def add_get_handler(self, key, handler):
+ self.get_handlers[key] = handler
+
+ def get_content(self, msg, *args, **kw):
+ content_type = msg.get_content_type()
+ if content_type in self.get_handlers:
+ return self.get_handlers[content_type](msg, *args, **kw)
+ maintype = msg.get_content_maintype()
+ if maintype in self.get_handlers:
+ return self.get_handlers[maintype](msg, *args, **kw)
+ if '' in self.get_handlers:
+ return self.get_handlers[''](msg, *args, **kw)
+ raise KeyError(content_type)
+
+ def add_set_handler(self, typekey, handler):
+ self.set_handlers[typekey] = handler
+
+ def set_content(self, msg, obj, *args, **kw):
+ if msg.get_content_maintype() == 'multipart':
+ # XXX: is this error a good idea or not? We can remove it later,
+ # but we can't add it later, so do it for now.
+ raise TypeError("set_content not valid on multipart")
+ handler = self._find_set_handler(msg, obj)
+ msg.clear_content()
+ handler(msg, obj, *args, **kw)
+
+ def _find_set_handler(self, msg, obj):
+ full_path_for_error = None
+ for typ in type(obj).__mro__:
+ if typ in self.set_handlers:
+ return self.set_handlers[typ]
+ qname = typ.__qualname__
+ modname = getattr(typ, '__module__', '')
+ full_path = '.'.join((modname, qname)) if modname else qname
+ if full_path_for_error is None:
+ full_path_for_error = full_path
+ if full_path in self.set_handlers:
+ return self.set_handlers[full_path]
+ if qname in self.set_handlers:
+ return self.set_handlers[qname]
+ name = typ.__name__
+ if name in self.set_handlers:
+ return self.set_handlers[name]
+ if None in self.set_handlers:
+ return self.set_handlers[None]
+ raise KeyError(full_path_for_error)
+
+
+raw_data_manager = ContentManager()
+
+
+def get_text_content(msg, errors='replace'):
+ content = msg.get_payload(decode=True)
+ charset = msg.get_param('charset', 'ASCII')
+ return content.decode(charset, errors=errors)
+raw_data_manager.add_get_handler('text', get_text_content)
+
+
+def get_non_text_content(msg):
+ return msg.get_payload(decode=True)
+for maintype in 'audio image video application'.split():
+ raw_data_manager.add_get_handler(maintype, get_non_text_content)
+
+
+def get_message_content(msg):
+ return msg.get_payload(0)
+for subtype in 'rfc822 external-body'.split():
+ raw_data_manager.add_get_handler('message/'+subtype, get_message_content)
+
+
+def get_and_fixup_unknown_message_content(msg):
+ # If we don't understand a message subtype, we are supposed to treat it as
+ # if it were application/octet-stream, per
+ # tools.ietf.org/html/rfc2046#section-5.2.4. Feedparser doesn't do that,
+ # so do our best to fix things up. Note that it is *not* appropriate to
+ # model message/partial content as Message objects, so they are handled
+ # here as well. (How to reassemble them is out of scope for this comment :)
+ return bytes(msg.get_payload(0))
+raw_data_manager.add_get_handler('message',
+ get_and_fixup_unknown_message_content)
+
+
+def _prepare_set(msg, maintype, subtype, headers):
+ msg['Content-Type'] = '/'.join((maintype, subtype))
+ if headers:
+ if not hasattr(headers[0], 'name'):
+ mp = msg.policy
+ headers = [mp.header_factory(*mp.header_source_parse([header]))
+ for header in headers]
+ try:
+ for header in headers:
+ if header.defects:
+ raise header.defects[0]
+ msg[header.name] = header
+ except email.errors.HeaderDefect as exc:
+ raise ValueError("Invalid header: {}".format(
+ header.fold(policy=msg.policy))) from exc
+
+
+def _finalize_set(msg, disposition, filename, cid, params):
+ if disposition is None and filename is not None:
+ disposition = 'attachment'
+ if disposition is not None:
+ msg['Content-Disposition'] = disposition
+ if filename is not None:
+ msg.set_param('filename',
+ filename,
+ header='Content-Disposition',
+ replace=True)
+ if cid is not None:
+ msg['Content-ID'] = cid
+ if params is not None:
+ for key, value in params.items():
+ msg.set_param(key, value)
+
+
+# XXX: This is a cleaned-up version of base64mime.body_encode (including a bug
+# fix in the calculation of unencoded_bytes_per_line). It would be nice to
+# drop both this and quoprimime.body_encode in favor of enhanced binascii
+# routines that accepted a max_line_length parameter.
+def _encode_base64(data, max_line_length):
+ encoded_lines = []
+ unencoded_bytes_per_line = max_line_length // 4 * 3
+ for i in range(0, len(data), unencoded_bytes_per_line):
+ thisline = data[i:i+unencoded_bytes_per_line]
+ encoded_lines.append(binascii.b2a_base64(thisline).decode('ascii'))
+ return ''.join(encoded_lines)
+
+
+def _encode_text(string, charset, cte, policy):
+ lines = string.encode(charset).splitlines()
+ linesep = policy.linesep.encode('ascii')
+ def embedded_body(lines): return linesep.join(lines) + linesep
+ def normal_body(lines): return b'\n'.join(lines) + b'\n'
+ if cte==None:
+ # Use heuristics to decide on the "best" encoding.
+ if max((len(x) for x in lines), default=0) <= policy.max_line_length:
+ try:
+ return '7bit', normal_body(lines).decode('ascii')
+ except UnicodeDecodeError:
+ pass
+ if policy.cte_type == '8bit':
+ return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
+ sniff = embedded_body(lines[:10])
+ sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'),
+ policy.max_line_length)
+ sniff_base64 = binascii.b2a_base64(sniff)
+ # This is a little unfair to qp; it includes lineseps, base64 doesn't.
+ if len(sniff_qp) > len(sniff_base64):
+ cte = 'base64'
+ else:
+ cte = 'quoted-printable'
+ if len(lines) <= 10:
+ return cte, sniff_qp
+ if cte == '7bit':
+ data = normal_body(lines).decode('ascii')
+ elif cte == '8bit':
+ data = normal_body(lines).decode('ascii', 'surrogateescape')
+ elif cte == 'quoted-printable':
+ data = quoprimime.body_encode(normal_body(lines).decode('latin-1'),
+ policy.max_line_length)
+ elif cte == 'base64':
+ data = _encode_base64(embedded_body(lines), policy.max_line_length)
+ else:
+ raise ValueError("Unknown content transfer encoding {}".format(cte))
+ return cte, data
+
+
+def set_text_content(msg, string, subtype="plain", charset='utf-8', cte=None,
+ disposition=None, filename=None, cid=None,
+ params=None, headers=None):
+ _prepare_set(msg, 'text', subtype, headers)
+ cte, payload = _encode_text(string, charset, cte, msg.policy)
+ msg.set_payload(payload)
+ msg.set_param('charset',
+ email.charset.ALIASES.get(charset, charset),
+ replace=True)
+ msg['Content-Transfer-Encoding'] = cte
+ _finalize_set(msg, disposition, filename, cid, params)
+raw_data_manager.add_set_handler(str, set_text_content)
+
+
+def set_message_content(msg, message, subtype="rfc822", cte=None,
+ disposition=None, filename=None, cid=None,
+ params=None, headers=None):
+ if subtype == 'partial':
+ raise ValueError("message/partial is not supported for Message objects")
+ if subtype == 'rfc822':
+ if cte not in (None, '7bit', '8bit', 'binary'):
+ # http://tools.ietf.org/html/rfc2046#section-5.2.1 mandate.
+ raise ValueError(
+ "message/rfc822 parts do not support cte={}".format(cte))
+ # 8bit will get coerced on serialization if policy.cte_type='7bit'. We
+ # may end up claiming 8bit when it isn't needed, but the only negative
+ # result of that should be a gateway that needs to coerce to 7bit
+ # having to look through the whole embedded message to discover whether
+ # or not it actually has to do anything.
+ cte = '8bit' if cte is None else cte
+ elif subtype == 'external-body':
+ if cte not in (None, '7bit'):
+ # http://tools.ietf.org/html/rfc2046#section-5.2.3 mandate.
+ raise ValueError(
+ "message/external-body parts do not support cte={}".format(cte))
+ cte = '7bit'
+ elif cte is None:
+ # http://tools.ietf.org/html/rfc2046#section-5.2.4 says all future
+ # subtypes should be restricted to 7bit, so assume that.
+ cte = '7bit'
+ _prepare_set(msg, 'message', subtype, headers)
+ msg.set_payload([message])
+ msg['Content-Transfer-Encoding'] = cte
+ _finalize_set(msg, disposition, filename, cid, params)
+raw_data_manager.add_set_handler(email.message.Message, set_message_content)
+
+
+def set_bytes_content(msg, data, maintype, subtype, cte='base64',
+ disposition=None, filename=None, cid=None,
+ params=None, headers=None):
+ _prepare_set(msg, maintype, subtype, headers)
+ if cte == 'base64':
+ data = _encode_base64(data, max_line_length=msg.policy.max_line_length)
+ elif cte == 'quoted-printable':
+ # XXX: quoprimime.body_encode won't encode newline characters in data,
+ # so we can't use it. This means max_line_length is ignored. Another
+ # bug to fix later. (Note: encoders.quopri is broken on line ends.)
+ data = binascii.b2a_qp(data, istext=False, header=False, quotetabs=True)
+ data = data.decode('ascii')
+ elif cte == '7bit':
+ # Make sure it really is only ASCII. The early warning here seems
+ # worth the overhead...if you care write your own content manager :).
+ data.encode('ascii')
+ elif cte in ('8bit', 'binary'):
+ data = data.decode('ascii', 'surrogateescape')
+ msg.set_payload(data)
+ msg['Content-Transfer-Encoding'] = cte
+ _finalize_set(msg, disposition, filename, cid, params)
+for typ in (bytes, bytearray, memoryview):
+ raw_data_manager.add_set_handler(typ, set_bytes_content)
diff --git a/dist/lib/email/encoders.py b/dist/lib/email/encoders.py
new file mode 100644
index 0000000..0a66acb
--- /dev/null
+++ b/dist/lib/email/encoders.py
@@ -0,0 +1,69 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Encodings and related functions."""
+
+__all__ = [
+ 'encode_7or8bit',
+ 'encode_base64',
+ 'encode_noop',
+ 'encode_quopri',
+ ]
+
+
+from base64 import encodebytes as _bencode
+from quopri import encodestring as _encodestring
+
+
+
+def _qencode(s):
+ enc = _encodestring(s, quotetabs=True)
+ # Must encode spaces, which quopri.encodestring() doesn't do
+ return enc.replace(b' ', b'=20')
+
+
+def encode_base64(msg):
+ """Encode the message's payload in Base64.
+
+ Also, add an appropriate Content-Transfer-Encoding header.
+ """
+ orig = msg.get_payload(decode=True)
+ encdata = str(_bencode(orig), 'ascii')
+ msg.set_payload(encdata)
+ msg['Content-Transfer-Encoding'] = 'base64'
+
+
+
+def encode_quopri(msg):
+ """Encode the message's payload in quoted-printable.
+
+ Also, add an appropriate Content-Transfer-Encoding header.
+ """
+ orig = msg.get_payload(decode=True)
+ encdata = _qencode(orig)
+ msg.set_payload(encdata)
+ msg['Content-Transfer-Encoding'] = 'quoted-printable'
+
+
+
+def encode_7or8bit(msg):
+ """Set the Content-Transfer-Encoding header to 7bit or 8bit."""
+ orig = msg.get_payload(decode=True)
+ if orig is None:
+ # There's no payload. For backwards compatibility we use 7bit
+ msg['Content-Transfer-Encoding'] = '7bit'
+ return
+ # We play a trick to make this go fast. If decoding from ASCII succeeds,
+ # we know the data must be 7bit, otherwise treat it as 8bit.
+ try:
+ orig.decode('ascii')
+ except UnicodeError:
+ msg['Content-Transfer-Encoding'] = '8bit'
+ else:
+ msg['Content-Transfer-Encoding'] = '7bit'
+
+
+
+def encode_noop(msg):
+ """Do nothing."""
diff --git a/dist/lib/email/errors.py b/dist/lib/email/errors.py
new file mode 100644
index 0000000..d28a680
--- /dev/null
+++ b/dist/lib/email/errors.py
@@ -0,0 +1,110 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""email package exception classes."""
+
+
+class MessageError(Exception):
+ """Base class for errors in the email package."""
+
+
+class MessageParseError(MessageError):
+ """Base class for message parsing errors."""
+
+
+class HeaderParseError(MessageParseError):
+ """Error while parsing headers."""
+
+
+class BoundaryError(MessageParseError):
+ """Couldn't find terminating boundary."""
+
+
+class MultipartConversionError(MessageError, TypeError):
+ """Conversion to a multipart is prohibited."""
+
+
+class CharsetError(MessageError):
+ """An illegal charset was given."""
+
+
+# These are parsing defects which the parser was able to work around.
+class MessageDefect(ValueError):
+ """Base class for a message defect."""
+
+ def __init__(self, line=None):
+ if line is not None:
+ super().__init__(line)
+ self.line = line
+
+class NoBoundaryInMultipartDefect(MessageDefect):
+ """A message claimed to be a multipart but had no boundary parameter."""
+
+class StartBoundaryNotFoundDefect(MessageDefect):
+ """The claimed start boundary was never found."""
+
+class CloseBoundaryNotFoundDefect(MessageDefect):
+ """A start boundary was found, but not the corresponding close boundary."""
+
+class FirstHeaderLineIsContinuationDefect(MessageDefect):
+ """A message had a continuation line as its first header line."""
+
+class MisplacedEnvelopeHeaderDefect(MessageDefect):
+ """A 'Unix-from' header was found in the middle of a header block."""
+
+class MissingHeaderBodySeparatorDefect(MessageDefect):
+ """Found line with no leading whitespace and no colon before blank line."""
+# XXX: backward compatibility, just in case (it was never emitted).
+MalformedHeaderDefect = MissingHeaderBodySeparatorDefect
+
+class MultipartInvariantViolationDefect(MessageDefect):
+ """A message claimed to be a multipart but no subparts were found."""
+
+class InvalidMultipartContentTransferEncodingDefect(MessageDefect):
+ """An invalid content transfer encoding was set on the multipart itself."""
+
+class UndecodableBytesDefect(MessageDefect):
+ """Header contained bytes that could not be decoded"""
+
+class InvalidBase64PaddingDefect(MessageDefect):
+ """base64 encoded sequence had an incorrect length"""
+
+class InvalidBase64CharactersDefect(MessageDefect):
+ """base64 encoded sequence had characters not in base64 alphabet"""
+
+class InvalidBase64LengthDefect(MessageDefect):
+ """base64 encoded sequence had invalid length (1 mod 4)"""
+
+# These errors are specific to header parsing.
+
+class HeaderDefect(MessageDefect):
+ """Base class for a header defect."""
+
+ def __init__(self, *args, **kw):
+ super().__init__(*args, **kw)
+
+class InvalidHeaderDefect(HeaderDefect):
+ """Header is not valid, message gives details."""
+
+class HeaderMissingRequiredValue(HeaderDefect):
+ """A header that must have a value had none"""
+
+class NonPrintableDefect(HeaderDefect):
+ """ASCII characters outside the ascii-printable range found"""
+
+ def __init__(self, non_printables):
+ super().__init__(non_printables)
+ self.non_printables = non_printables
+
+ def __str__(self):
+ return ("the following ASCII non-printables found in header: "
+ "{}".format(self.non_printables))
+
+class ObsoleteHeaderDefect(HeaderDefect):
+ """Header uses syntax declared obsolete by RFC 5322"""
+
+class NonASCIILocalPartDefect(HeaderDefect):
+ """local_part contains non-ASCII characters"""
+ # This defect only occurs during unicode parsing, not when
+ # parsing messages decoded from binary.
diff --git a/dist/lib/email/feedparser.py b/dist/lib/email/feedparser.py
new file mode 100644
index 0000000..97d3f51
--- /dev/null
+++ b/dist/lib/email/feedparser.py
@@ -0,0 +1,536 @@
+# Copyright (C) 2004-2006 Python Software Foundation
+# Authors: Baxter, Wouters and Warsaw
+# Contact: email-sig@python.org
+
+"""FeedParser - An email feed parser.
+
+The feed parser implements an interface for incrementally parsing an email
+message, line by line. This has advantages for certain applications, such as
+those reading email messages off a socket.
+
+FeedParser.feed() is the primary interface for pushing new data into the
+parser. It returns when there's nothing more it can do with the available
+data. When you have no more data to push into the parser, call .close().
+This completes the parsing and returns the root message object.
+
+The other advantage of this parser is that it will never raise a parsing
+exception. Instead, when it finds something unexpected, it adds a 'defect' to
+the current message. Defects are just instances that live on the message
+object's .defects attribute.
+"""
+
+__all__ = ['FeedParser', 'BytesFeedParser']
+
+import re
+
+from email import errors
+from email._policybase import compat32
+from collections import deque
+from io import StringIO
+
+NLCRE = re.compile(r'\r\n|\r|\n')
+NLCRE_bol = re.compile(r'(\r\n|\r|\n)')
+NLCRE_eol = re.compile(r'(\r\n|\r|\n)\Z')
+NLCRE_crack = re.compile(r'(\r\n|\r|\n)')
+# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
+# except controls, SP, and ":".
+headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
+EMPTYSTRING = ''
+NL = '\n'
+
+NeedMoreData = object()
+
+
+
+class BufferedSubFile(object):
+ """A file-ish object that can have new data loaded into it.
+
+ You can also push and pop line-matching predicates onto a stack. When the
+ current predicate matches the current line, a false EOF response
+ (i.e. empty string) is returned instead. This lets the parser adhere to a
+ simple abstraction -- it parses until EOF closes the current message.
+ """
+ def __init__(self):
+ # Text stream of the last partial line pushed into this object.
+ # See issue 22233 for why this is a text stream and not a list.
+ self._partial = StringIO(newline='')
+ # A deque of full, pushed lines
+ self._lines = deque()
+ # The stack of false-EOF checking predicates.
+ self._eofstack = []
+ # A flag indicating whether the file has been closed or not.
+ self._closed = False
+
+ def push_eof_matcher(self, pred):
+ self._eofstack.append(pred)
+
+ def pop_eof_matcher(self):
+ return self._eofstack.pop()
+
+ def close(self):
+ # Don't forget any trailing partial line.
+ self._partial.seek(0)
+ self.pushlines(self._partial.readlines())
+ self._partial.seek(0)
+ self._partial.truncate()
+ self._closed = True
+
+ def readline(self):
+ if not self._lines:
+ if self._closed:
+ return ''
+ return NeedMoreData
+ # Pop the line off the stack and see if it matches the current
+ # false-EOF predicate.
+ line = self._lines.popleft()
+ # RFC 2046, section 5.1.2 requires us to recognize outer level
+ # boundaries at any level of inner nesting. Do this, but be sure it's
+ # in the order of most to least nested.
+ for ateof in reversed(self._eofstack):
+ if ateof(line):
+ # We're at the false EOF. But push the last line back first.
+ self._lines.appendleft(line)
+ return ''
+ return line
+
+ def unreadline(self, line):
+ # Let the consumer push a line back into the buffer.
+ assert line is not NeedMoreData
+ self._lines.appendleft(line)
+
+ def push(self, data):
+ """Push some new data into this object."""
+ self._partial.write(data)
+ if '\n' not in data and '\r' not in data:
+ # No new complete lines, wait for more.
+ return
+
+ # Crack into lines, preserving the linesep characters.
+ self._partial.seek(0)
+ parts = self._partial.readlines()
+ self._partial.seek(0)
+ self._partial.truncate()
+
+ # If the last element of the list does not end in a newline, then treat
+ # it as a partial line. We only check for '\n' here because a line
+ # ending with '\r' might be a line that was split in the middle of a
+ # '\r\n' sequence (see bugs 1555570 and 1721862).
+ if not parts[-1].endswith('\n'):
+ self._partial.write(parts.pop())
+ self.pushlines(parts)
+
+ def pushlines(self, lines):
+ self._lines.extend(lines)
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ line = self.readline()
+ if line == '':
+ raise StopIteration
+ return line
+
+
+
+class FeedParser:
+ """A feed-style parser of email."""
+
+ def __init__(self, _factory=None, *, policy=compat32):
+ """_factory is called with no arguments to create a new message obj
+
+ The policy keyword specifies a policy object that controls a number of
+ aspects of the parser's operation. The default policy maintains
+ backward compatibility.
+
+ """
+ self.policy = policy
+ self._old_style_factory = False
+ if _factory is None:
+ if policy.message_factory is None:
+ from email.message import Message
+ self._factory = Message
+ else:
+ self._factory = policy.message_factory
+ else:
+ self._factory = _factory
+ try:
+ _factory(policy=self.policy)
+ except TypeError:
+ # Assume this is an old-style factory
+ self._old_style_factory = True
+ self._input = BufferedSubFile()
+ self._msgstack = []
+ self._parse = self._parsegen().__next__
+ self._cur = None
+ self._last = None
+ self._headersonly = False
+
+ # Non-public interface for supporting Parser's headersonly flag
+ def _set_headersonly(self):
+ self._headersonly = True
+
+ def feed(self, data):
+ """Push more data into the parser."""
+ self._input.push(data)
+ self._call_parse()
+
+ def _call_parse(self):
+ try:
+ self._parse()
+ except StopIteration:
+ pass
+
+ def close(self):
+ """Parse all remaining data and return the root message object."""
+ self._input.close()
+ self._call_parse()
+ root = self._pop_message()
+ assert not self._msgstack
+ # Look for final set of defects
+ if root.get_content_maintype() == 'multipart' \
+ and not root.is_multipart():
+ defect = errors.MultipartInvariantViolationDefect()
+ self.policy.handle_defect(root, defect)
+ return root
+
+ def _new_message(self):
+ if self._old_style_factory:
+ msg = self._factory()
+ else:
+ msg = self._factory(policy=self.policy)
+ if self._cur and self._cur.get_content_type() == 'multipart/digest':
+ msg.set_default_type('message/rfc822')
+ if self._msgstack:
+ self._msgstack[-1].attach(msg)
+ self._msgstack.append(msg)
+ self._cur = msg
+ self._last = msg
+
+ def _pop_message(self):
+ retval = self._msgstack.pop()
+ if self._msgstack:
+ self._cur = self._msgstack[-1]
+ else:
+ self._cur = None
+ return retval
+
+ def _parsegen(self):
+ # Create a new message and start by parsing headers.
+ self._new_message()
+ headers = []
+ # Collect the headers, searching for a line that doesn't match the RFC
+ # 2822 header or continuation pattern (including an empty line).
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if not headerRE.match(line):
+ # If we saw the RFC defined header/body separator
+ # (i.e. newline), just throw it away. Otherwise the line is
+ # part of the body so push it back.
+ if not NLCRE.match(line):
+ defect = errors.MissingHeaderBodySeparatorDefect()
+ self.policy.handle_defect(self._cur, defect)
+ self._input.unreadline(line)
+ break
+ headers.append(line)
+ # Done with the headers, so parse them and figure out what we're
+ # supposed to see in the body of the message.
+ self._parse_headers(headers)
+ # Headers-only parsing is a backwards compatibility hack, which was
+ # necessary in the older parser, which could raise errors. All
+ # remaining lines in the input are thrown into the message body.
+ if self._headersonly:
+ lines = []
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if line == '':
+ break
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+ return
+ if self._cur.get_content_type() == 'message/delivery-status':
+ # message/delivery-status contains blocks of headers separated by
+ # a blank line. We'll represent each header block as a separate
+ # nested message object, but the processing is a bit different
+ # than standard message/* types because there is no body for the
+ # nested messages. A blank line separates the subparts.
+ while True:
+ self._input.push_eof_matcher(NLCRE.match)
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ msg = self._pop_message()
+ # We need to pop the EOF matcher in order to tell if we're at
+ # the end of the current file, not the end of the last block
+ # of message headers.
+ self._input.pop_eof_matcher()
+ # The input stream must be sitting at the newline or at the
+ # EOF. We want to see if we're at the end of this subpart, so
+ # first consume the blank line, then test the next line to see
+ # if we're at this subpart's EOF.
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ if line == '':
+ break
+ # Not at EOF so this is a line we're going to need.
+ self._input.unreadline(line)
+ return
+ if self._cur.get_content_maintype() == 'message':
+ # The message claims to be a message/* type, then what follows is
+ # another RFC 2822 message.
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ self._pop_message()
+ return
+ if self._cur.get_content_maintype() == 'multipart':
+ boundary = self._cur.get_boundary()
+ if boundary is None:
+ # The message /claims/ to be a multipart but it has not
+ # defined a boundary. That's a problem which we'll handle by
+ # reading everything until the EOF and marking the message as
+ # defective.
+ defect = errors.NoBoundaryInMultipartDefect()
+ self.policy.handle_defect(self._cur, defect)
+ lines = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+ return
+ # Make sure a valid content type was specified per RFC 2045:6.4.
+ if (str(self._cur.get('content-transfer-encoding', '8bit')).lower()
+ not in ('7bit', '8bit', 'binary')):
+ defect = errors.InvalidMultipartContentTransferEncodingDefect()
+ self.policy.handle_defect(self._cur, defect)
+ # Create a line match predicate which matches the inter-part
+ # boundary as well as the end-of-multipart boundary. Don't push
+ # this onto the input stream until we've scanned past the
+ # preamble.
+ separator = '--' + boundary
+ boundaryre = re.compile(
+ '(?P' + re.escape(separator) +
+ r')(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$')
+ capturing_preamble = True
+ preamble = []
+ linesep = False
+ close_boundary_seen = False
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ if line == '':
+ break
+ mo = boundaryre.match(line)
+ if mo:
+ # If we're looking at the end boundary, we're done with
+ # this multipart. If there was a newline at the end of
+ # the closing boundary, then we need to initialize the
+ # epilogue with the empty string (see below).
+ if mo.group('end'):
+ close_boundary_seen = True
+ linesep = mo.group('linesep')
+ break
+ # We saw an inter-part boundary. Were we in the preamble?
+ if capturing_preamble:
+ if preamble:
+ # According to RFC 2046, the last newline belongs
+ # to the boundary.
+ lastline = preamble[-1]
+ eolmo = NLCRE_eol.search(lastline)
+ if eolmo:
+ preamble[-1] = lastline[:-len(eolmo.group(0))]
+ self._cur.preamble = EMPTYSTRING.join(preamble)
+ capturing_preamble = False
+ self._input.unreadline(line)
+ continue
+ # We saw a boundary separating two parts. Consume any
+ # multiple boundary lines that may be following. Our
+ # interpretation of RFC 2046 BNF grammar does not produce
+ # body parts within such double boundaries.
+ while True:
+ line = self._input.readline()
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ mo = boundaryre.match(line)
+ if not mo:
+ self._input.unreadline(line)
+ break
+ # Recurse to parse this subpart; the input stream points
+ # at the subpart's first line.
+ self._input.push_eof_matcher(boundaryre.match)
+ for retval in self._parsegen():
+ if retval is NeedMoreData:
+ yield NeedMoreData
+ continue
+ break
+ # Because of RFC 2046, the newline preceding the boundary
+ # separator actually belongs to the boundary, not the
+ # previous subpart's payload (or epilogue if the previous
+ # part is a multipart).
+ if self._last.get_content_maintype() == 'multipart':
+ epilogue = self._last.epilogue
+ if epilogue == '':
+ self._last.epilogue = None
+ elif epilogue is not None:
+ mo = NLCRE_eol.search(epilogue)
+ if mo:
+ end = len(mo.group(0))
+ self._last.epilogue = epilogue[:-end]
+ else:
+ payload = self._last._payload
+ if isinstance(payload, str):
+ mo = NLCRE_eol.search(payload)
+ if mo:
+ payload = payload[:-len(mo.group(0))]
+ self._last._payload = payload
+ self._input.pop_eof_matcher()
+ self._pop_message()
+ # Set the multipart up for newline cleansing, which will
+ # happen if we're in a nested multipart.
+ self._last = self._cur
+ else:
+ # I think we must be in the preamble
+ assert capturing_preamble
+ preamble.append(line)
+ # We've seen either the EOF or the end boundary. If we're still
+ # capturing the preamble, we never saw the start boundary. Note
+ # that as a defect and store the captured text as the payload.
+ if capturing_preamble:
+ defect = errors.StartBoundaryNotFoundDefect()
+ self.policy.handle_defect(self._cur, defect)
+ self._cur.set_payload(EMPTYSTRING.join(preamble))
+ epilogue = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ self._cur.epilogue = EMPTYSTRING.join(epilogue)
+ return
+ # If we're not processing the preamble, then we might have seen
+ # EOF without seeing that end boundary...that is also a defect.
+ if not close_boundary_seen:
+ defect = errors.CloseBoundaryNotFoundDefect()
+ self.policy.handle_defect(self._cur, defect)
+ return
+ # Everything from here to the EOF is epilogue. If the end boundary
+ # ended in a newline, we'll need to make sure the epilogue isn't
+ # None
+ if linesep:
+ epilogue = ['']
+ else:
+ epilogue = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ epilogue.append(line)
+ # Any CRLF at the front of the epilogue is not technically part of
+ # the epilogue. Also, watch out for an empty string epilogue,
+ # which means a single newline.
+ if epilogue:
+ firstline = epilogue[0]
+ bolmo = NLCRE_bol.match(firstline)
+ if bolmo:
+ epilogue[0] = firstline[len(bolmo.group(0)):]
+ self._cur.epilogue = EMPTYSTRING.join(epilogue)
+ return
+ # Otherwise, it's some non-multipart type, so the entire rest of the
+ # file contents becomes the payload.
+ lines = []
+ for line in self._input:
+ if line is NeedMoreData:
+ yield NeedMoreData
+ continue
+ lines.append(line)
+ self._cur.set_payload(EMPTYSTRING.join(lines))
+
+ def _parse_headers(self, lines):
+ # Passed a list of lines that make up the headers for the current msg
+ lastheader = ''
+ lastvalue = []
+ for lineno, line in enumerate(lines):
+ # Check for continuation
+ if line[0] in ' \t':
+ if not lastheader:
+ # The first line of the headers was a continuation. This
+ # is illegal, so let's note the defect, store the illegal
+ # line, and ignore it for purposes of headers.
+ defect = errors.FirstHeaderLineIsContinuationDefect(line)
+ self.policy.handle_defect(self._cur, defect)
+ continue
+ lastvalue.append(line)
+ continue
+ if lastheader:
+ self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
+ lastheader, lastvalue = '', []
+ # Check for envelope header, i.e. unix-from
+ if line.startswith('From '):
+ if lineno == 0:
+ # Strip off the trailing newline
+ mo = NLCRE_eol.search(line)
+ if mo:
+ line = line[:-len(mo.group(0))]
+ self._cur.set_unixfrom(line)
+ continue
+ elif lineno == len(lines) - 1:
+ # Something looking like a unix-from at the end - it's
+ # probably the first line of the body, so push back the
+ # line and stop.
+ self._input.unreadline(line)
+ return
+ else:
+ # Weirdly placed unix-from line. Note this as a defect
+ # and ignore it.
+ defect = errors.MisplacedEnvelopeHeaderDefect(line)
+ self._cur.defects.append(defect)
+ continue
+ # Split the line on the colon separating field name from value.
+ # There will always be a colon, because if there wasn't the part of
+ # the parser that calls us would have started parsing the body.
+ i = line.find(':')
+
+ # If the colon is on the start of the line the header is clearly
+ # malformed, but we might be able to salvage the rest of the
+ # message. Track the error but keep going.
+ if i == 0:
+ defect = errors.InvalidHeaderDefect("Missing header name.")
+ self._cur.defects.append(defect)
+ continue
+
+ assert i>0, "_parse_headers fed line with no : and no leading WS"
+ lastheader = line[:i]
+ lastvalue = [line]
+ # Done with all the lines, so handle the last header.
+ if lastheader:
+ self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
+
+
+class BytesFeedParser(FeedParser):
+ """Like FeedParser, but feed accepts bytes."""
+
+ def feed(self, data):
+ super().feed(data.decode('ascii', 'surrogateescape'))
diff --git a/dist/lib/email/generator.py b/dist/lib/email/generator.py
new file mode 100644
index 0000000..ae670c2
--- /dev/null
+++ b/dist/lib/email/generator.py
@@ -0,0 +1,508 @@
+# Copyright (C) 2001-2010 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Classes to generate plain text from a message object tree."""
+
+__all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator']
+
+import re
+import sys
+import time
+import random
+
+from copy import deepcopy
+from io import StringIO, BytesIO
+from email.utils import _has_surrogates
+
+UNDERSCORE = '_'
+NL = '\n' # XXX: no longer used by the code below.
+
+NLCRE = re.compile(r'\r\n|\r|\n')
+fcre = re.compile(r'^From ', re.MULTILINE)
+
+
+
+class Generator:
+ """Generates output from a Message object tree.
+
+ This basic generator writes the message to the given file object as plain
+ text.
+ """
+ #
+ # Public interface
+ #
+
+ def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *,
+ policy=None):
+ """Create the generator for message flattening.
+
+ outfp is the output file-like object for writing the message to. It
+ must have a write() method.
+
+ Optional mangle_from_ is a flag that, when True (the default if policy
+ is not set), escapes From_ lines in the body of the message by putting
+ a `>' in front of them.
+
+ Optional maxheaderlen specifies the longest length for a non-continued
+ header. When a header line is longer (in characters, with tabs
+ expanded to 8 spaces) than maxheaderlen, the header will split as
+ defined in the Header class. Set maxheaderlen to zero to disable
+ header wrapping. The default is 78, as recommended (but not required)
+ by RFC 2822.
+
+ The policy keyword specifies a policy object that controls a number of
+ aspects of the generator's operation. If no policy is specified,
+ the policy associated with the Message object passed to the
+ flatten method is used.
+
+ """
+
+ if mangle_from_ is None:
+ mangle_from_ = True if policy is None else policy.mangle_from_
+ self._fp = outfp
+ self._mangle_from_ = mangle_from_
+ self.maxheaderlen = maxheaderlen
+ self.policy = policy
+
+ def write(self, s):
+ # Just delegate to the file object
+ self._fp.write(s)
+
+ def flatten(self, msg, unixfrom=False, linesep=None):
+ r"""Print the message object tree rooted at msg to the output file
+ specified when the Generator instance was created.
+
+ unixfrom is a flag that forces the printing of a Unix From_ delimiter
+ before the first object in the message tree. If the original message
+ has no From_ delimiter, a `standard' one is crafted. By default, this
+ is False to inhibit the printing of any From_ delimiter.
+
+ Note that for subobjects, no From_ line is printed.
+
+ linesep specifies the characters used to indicate a new line in
+ the output. The default value is determined by the policy specified
+ when the Generator instance was created or, if none was specified,
+ from the policy associated with the msg.
+
+ """
+ # We use the _XXX constants for operating on data that comes directly
+ # from the msg, and _encoded_XXX constants for operating on data that
+ # has already been converted (to bytes in the BytesGenerator) and
+ # inserted into a temporary buffer.
+ policy = msg.policy if self.policy is None else self.policy
+ if linesep is not None:
+ policy = policy.clone(linesep=linesep)
+ if self.maxheaderlen is not None:
+ policy = policy.clone(max_line_length=self.maxheaderlen)
+ self._NL = policy.linesep
+ self._encoded_NL = self._encode(self._NL)
+ self._EMPTY = ''
+ self._encoded_EMPTY = self._encode(self._EMPTY)
+ # Because we use clone (below) when we recursively process message
+ # subparts, and because clone uses the computed policy (not None),
+ # submessages will automatically get set to the computed policy when
+ # they are processed by this code.
+ old_gen_policy = self.policy
+ old_msg_policy = msg.policy
+ try:
+ self.policy = policy
+ msg.policy = policy
+ if unixfrom:
+ ufrom = msg.get_unixfrom()
+ if not ufrom:
+ ufrom = 'From nobody ' + time.ctime(time.time())
+ self.write(ufrom + self._NL)
+ self._write(msg)
+ finally:
+ self.policy = old_gen_policy
+ msg.policy = old_msg_policy
+
+ def clone(self, fp):
+ """Clone this generator with the exact same options."""
+ return self.__class__(fp,
+ self._mangle_from_,
+ None, # Use policy setting, which we've adjusted
+ policy=self.policy)
+
+ #
+ # Protected interface - undocumented ;/
+ #
+
+ # Note that we use 'self.write' when what we are writing is coming from
+ # the source, and self._fp.write when what we are writing is coming from a
+ # buffer (because the Bytes subclass has already had a chance to transform
+ # the data in its write method in that case). This is an entirely
+ # pragmatic split determined by experiment; we could be more general by
+ # always using write and having the Bytes subclass write method detect when
+ # it has already transformed the input; but, since this whole thing is a
+ # hack anyway this seems good enough.
+
+ def _new_buffer(self):
+ # BytesGenerator overrides this to return BytesIO.
+ return StringIO()
+
+ def _encode(self, s):
+ # BytesGenerator overrides this to encode strings to bytes.
+ return s
+
+ def _write_lines(self, lines):
+ # We have to transform the line endings.
+ if not lines:
+ return
+ lines = NLCRE.split(lines)
+ for line in lines[:-1]:
+ self.write(line)
+ self.write(self._NL)
+ if lines[-1]:
+ self.write(lines[-1])
+ # XXX logic tells me this else should be needed, but the tests fail
+ # with it and pass without it. (NLCRE.split ends with a blank element
+ # if and only if there was a trailing newline.)
+ #else:
+ # self.write(self._NL)
+
+ def _write(self, msg):
+ # We can't write the headers yet because of the following scenario:
+ # say a multipart message includes the boundary string somewhere in
+ # its body. We'd have to calculate the new boundary /before/ we write
+ # the headers so that we can write the correct Content-Type:
+ # parameter.
+ #
+ # The way we do this, so as to make the _handle_*() methods simpler,
+ # is to cache any subpart writes into a buffer. The we write the
+ # headers and the buffer contents. That way, subpart handlers can
+ # Do The Right Thing, and can still modify the Content-Type: header if
+ # necessary.
+ oldfp = self._fp
+ try:
+ self._munge_cte = None
+ self._fp = sfp = self._new_buffer()
+ self._dispatch(msg)
+ finally:
+ self._fp = oldfp
+ munge_cte = self._munge_cte
+ del self._munge_cte
+ # If we munged the cte, copy the message again and re-fix the CTE.
+ if munge_cte:
+ msg = deepcopy(msg)
+ msg.replace_header('content-transfer-encoding', munge_cte[0])
+ msg.replace_header('content-type', munge_cte[1])
+ # Write the headers. First we see if the message object wants to
+ # handle that itself. If not, we'll do it generically.
+ meth = getattr(msg, '_write_headers', None)
+ if meth is None:
+ self._write_headers(msg)
+ else:
+ meth(self)
+ self._fp.write(sfp.getvalue())
+
+ def _dispatch(self, msg):
+ # Get the Content-Type: for the message, then try to dispatch to
+ # self._handle__(). If there's no handler for the
+ # full MIME type, then dispatch to self._handle_(). If
+ # that's missing too, then dispatch to self._writeBody().
+ main = msg.get_content_maintype()
+ sub = msg.get_content_subtype()
+ specific = UNDERSCORE.join((main, sub)).replace('-', '_')
+ meth = getattr(self, '_handle_' + specific, None)
+ if meth is None:
+ generic = main.replace('-', '_')
+ meth = getattr(self, '_handle_' + generic, None)
+ if meth is None:
+ meth = self._writeBody
+ meth(msg)
+
+ #
+ # Default handlers
+ #
+
+ def _write_headers(self, msg):
+ for h, v in msg.raw_items():
+ self.write(self.policy.fold(h, v))
+ # A blank line always separates headers from body
+ self.write(self._NL)
+
+ #
+ # Handlers for writing types and subtypes
+ #
+
+ def _handle_text(self, msg):
+ payload = msg.get_payload()
+ if payload is None:
+ return
+ if not isinstance(payload, str):
+ raise TypeError('string payload expected: %s' % type(payload))
+ if _has_surrogates(msg._payload):
+ charset = msg.get_param('charset')
+ if charset is not None:
+ # XXX: This copy stuff is an ugly hack to avoid modifying the
+ # existing message.
+ msg = deepcopy(msg)
+ del msg['content-transfer-encoding']
+ msg.set_payload(payload, charset)
+ payload = msg.get_payload()
+ self._munge_cte = (msg['content-transfer-encoding'],
+ msg['content-type'])
+ if self._mangle_from_:
+ payload = fcre.sub('>From ', payload)
+ self._write_lines(payload)
+
+ # Default body handler
+ _writeBody = _handle_text
+
+ def _handle_multipart(self, msg):
+ # The trick here is to write out each part separately, merge them all
+ # together, and then make sure that the boundary we've chosen isn't
+ # present in the payload.
+ msgtexts = []
+ subparts = msg.get_payload()
+ if subparts is None:
+ subparts = []
+ elif isinstance(subparts, str):
+ # e.g. a non-strict parse of a message with no starting boundary.
+ self.write(subparts)
+ return
+ elif not isinstance(subparts, list):
+ # Scalar payload
+ subparts = [subparts]
+ for part in subparts:
+ s = self._new_buffer()
+ g = self.clone(s)
+ g.flatten(part, unixfrom=False, linesep=self._NL)
+ msgtexts.append(s.getvalue())
+ # BAW: What about boundaries that are wrapped in double-quotes?
+ boundary = msg.get_boundary()
+ if not boundary:
+ # Create a boundary that doesn't appear in any of the
+ # message texts.
+ alltext = self._encoded_NL.join(msgtexts)
+ boundary = self._make_boundary(alltext)
+ msg.set_boundary(boundary)
+ # If there's a preamble, write it out, with a trailing CRLF
+ if msg.preamble is not None:
+ if self._mangle_from_:
+ preamble = fcre.sub('>From ', msg.preamble)
+ else:
+ preamble = msg.preamble
+ self._write_lines(preamble)
+ self.write(self._NL)
+ # dash-boundary transport-padding CRLF
+ self.write('--' + boundary + self._NL)
+ # body-part
+ if msgtexts:
+ self._fp.write(msgtexts.pop(0))
+ # *encapsulation
+ # --> delimiter transport-padding
+ # --> CRLF body-part
+ for body_part in msgtexts:
+ # delimiter transport-padding CRLF
+ self.write(self._NL + '--' + boundary + self._NL)
+ # body-part
+ self._fp.write(body_part)
+ # close-delimiter transport-padding
+ self.write(self._NL + '--' + boundary + '--' + self._NL)
+ if msg.epilogue is not None:
+ if self._mangle_from_:
+ epilogue = fcre.sub('>From ', msg.epilogue)
+ else:
+ epilogue = msg.epilogue
+ self._write_lines(epilogue)
+
+ def _handle_multipart_signed(self, msg):
+ # The contents of signed parts has to stay unmodified in order to keep
+ # the signature intact per RFC1847 2.1, so we disable header wrapping.
+ # RDM: This isn't enough to completely preserve the part, but it helps.
+ p = self.policy
+ self.policy = p.clone(max_line_length=0)
+ try:
+ self._handle_multipart(msg)
+ finally:
+ self.policy = p
+
+ def _handle_message_delivery_status(self, msg):
+ # We can't just write the headers directly to self's file object
+ # because this will leave an extra newline between the last header
+ # block and the boundary. Sigh.
+ blocks = []
+ for part in msg.get_payload():
+ s = self._new_buffer()
+ g = self.clone(s)
+ g.flatten(part, unixfrom=False, linesep=self._NL)
+ text = s.getvalue()
+ lines = text.split(self._encoded_NL)
+ # Strip off the unnecessary trailing empty line
+ if lines and lines[-1] == self._encoded_EMPTY:
+ blocks.append(self._encoded_NL.join(lines[:-1]))
+ else:
+ blocks.append(text)
+ # Now join all the blocks with an empty line. This has the lovely
+ # effect of separating each block with an empty line, but not adding
+ # an extra one after the last one.
+ self._fp.write(self._encoded_NL.join(blocks))
+
+ def _handle_message(self, msg):
+ s = self._new_buffer()
+ g = self.clone(s)
+ # The payload of a message/rfc822 part should be a multipart sequence
+ # of length 1. The zeroth element of the list should be the Message
+ # object for the subpart. Extract that object, stringify it, and
+ # write it out.
+ # Except, it turns out, when it's a string instead, which happens when
+ # and only when HeaderParser is used on a message of mime type
+ # message/rfc822. Such messages are generated by, for example,
+ # Groupwise when forwarding unadorned messages. (Issue 7970.) So
+ # in that case we just emit the string body.
+ payload = msg._payload
+ if isinstance(payload, list):
+ g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
+ payload = s.getvalue()
+ else:
+ payload = self._encode(payload)
+ self._fp.write(payload)
+
+ # This used to be a module level function; we use a classmethod for this
+ # and _compile_re so we can continue to provide the module level function
+ # for backward compatibility by doing
+ # _make_boundary = Generator._make_boundary
+ # at the end of the module. It *is* internal, so we could drop that...
+ @classmethod
+ def _make_boundary(cls, text=None):
+ # Craft a random boundary. If text is given, ensure that the chosen
+ # boundary doesn't appear in the text.
+ token = random.randrange(sys.maxsize)
+ boundary = ('=' * 15) + (_fmt % token) + '=='
+ if text is None:
+ return boundary
+ b = boundary
+ counter = 0
+ while True:
+ cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
+ if not cre.search(text):
+ break
+ b = boundary + '.' + str(counter)
+ counter += 1
+ return b
+
+ @classmethod
+ def _compile_re(cls, s, flags):
+ return re.compile(s, flags)
+
+
+class BytesGenerator(Generator):
+ """Generates a bytes version of a Message object tree.
+
+ Functionally identical to the base Generator except that the output is
+ bytes and not string. When surrogates were used in the input to encode
+ bytes, these are decoded back to bytes for output. If the policy has
+ cte_type set to 7bit, then the message is transformed such that the
+ non-ASCII bytes are properly content transfer encoded, using the charset
+ unknown-8bit.
+
+ The outfp object must accept bytes in its write method.
+ """
+
+ def write(self, s):
+ self._fp.write(s.encode('ascii', 'surrogateescape'))
+
+ def _new_buffer(self):
+ return BytesIO()
+
+ def _encode(self, s):
+ return s.encode('ascii')
+
+ def _write_headers(self, msg):
+ # This is almost the same as the string version, except for handling
+ # strings with 8bit bytes.
+ for h, v in msg.raw_items():
+ self._fp.write(self.policy.fold_binary(h, v))
+ # A blank line always separates headers from body
+ self.write(self._NL)
+
+ def _handle_text(self, msg):
+ # If the string has surrogates the original source was bytes, so
+ # just write it back out.
+ if msg._payload is None:
+ return
+ if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
+ if self._mangle_from_:
+ msg._payload = fcre.sub(">From ", msg._payload)
+ self._write_lines(msg._payload)
+ else:
+ super(BytesGenerator,self)._handle_text(msg)
+
+ # Default body handler
+ _writeBody = _handle_text
+
+ @classmethod
+ def _compile_re(cls, s, flags):
+ return re.compile(s.encode('ascii'), flags)
+
+
+
+_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
+
+class DecodedGenerator(Generator):
+ """Generates a text representation of a message.
+
+ Like the Generator base class, except that non-text parts are substituted
+ with a format string representing the part.
+ """
+ def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, fmt=None, *,
+ policy=None):
+ """Like Generator.__init__() except that an additional optional
+ argument is allowed.
+
+ Walks through all subparts of a message. If the subpart is of main
+ type `text', then it prints the decoded payload of the subpart.
+
+ Otherwise, fmt is a format string that is used instead of the message
+ payload. fmt is expanded with the following keywords (in
+ %(keyword)s format):
+
+ type : Full MIME type of the non-text part
+ maintype : Main MIME type of the non-text part
+ subtype : Sub-MIME type of the non-text part
+ filename : Filename of the non-text part
+ description: Description associated with the non-text part
+ encoding : Content transfer encoding of the non-text part
+
+ The default value for fmt is None, meaning
+
+ [Non-text (%(type)s) part of message omitted, filename %(filename)s]
+ """
+ Generator.__init__(self, outfp, mangle_from_, maxheaderlen,
+ policy=policy)
+ if fmt is None:
+ self._fmt = _FMT
+ else:
+ self._fmt = fmt
+
+ def _dispatch(self, msg):
+ for part in msg.walk():
+ maintype = part.get_content_maintype()
+ if maintype == 'text':
+ print(part.get_payload(decode=False), file=self)
+ elif maintype == 'multipart':
+ # Just skip this
+ pass
+ else:
+ print(self._fmt % {
+ 'type' : part.get_content_type(),
+ 'maintype' : part.get_content_maintype(),
+ 'subtype' : part.get_content_subtype(),
+ 'filename' : part.get_filename('[no filename]'),
+ 'description': part.get('Content-Description',
+ '[no description]'),
+ 'encoding' : part.get('Content-Transfer-Encoding',
+ '[no encoding]'),
+ }, file=self)
+
+
+
+# Helper used by Generator._make_boundary
+_width = len(repr(sys.maxsize-1))
+_fmt = '%%0%dd' % _width
+
+# Backward compatibility
+_make_boundary = Generator._make_boundary
diff --git a/dist/lib/email/header.py b/dist/lib/email/header.py
new file mode 100644
index 0000000..4ab0032
--- /dev/null
+++ b/dist/lib/email/header.py
@@ -0,0 +1,578 @@
+# Copyright (C) 2002-2007 Python Software Foundation
+# Author: Ben Gertzfield, Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Header encoding and decoding functionality."""
+
+__all__ = [
+ 'Header',
+ 'decode_header',
+ 'make_header',
+ ]
+
+import re
+import binascii
+
+import email.quoprimime
+import email.base64mime
+
+from email.errors import HeaderParseError
+from email import charset as _charset
+Charset = _charset.Charset
+
+NL = '\n'
+SPACE = ' '
+BSPACE = b' '
+SPACE8 = ' ' * 8
+EMPTYSTRING = ''
+MAXLINELEN = 78
+FWS = ' \t'
+
+USASCII = Charset('us-ascii')
+UTF8 = Charset('utf-8')
+
+# Match encoded-word strings in the form =?charset?q?Hello_World?=
+ecre = re.compile(r'''
+ =\? # literal =?
+ (?P[^?]*?) # non-greedy up to the next ? is the charset
+ \? # literal ?
+ (?P[qQbB]) # either a "q" or a "b", case insensitive
+ \? # literal ?
+ (?P.*?) # non-greedy up to the next ?= is the encoded string
+ \?= # literal ?=
+ ''', re.VERBOSE | re.MULTILINE)
+
+# Field name regexp, including trailing colon, but not separating whitespace,
+# according to RFC 2822. Character range is from tilde to exclamation mark.
+# For use with .match()
+fcre = re.compile(r'[\041-\176]+:$')
+
+# Find a header embedded in a putative header value. Used to check for
+# header injection attack.
+_embedded_header = re.compile(r'\n[^ \t]+:')
+
+
+
+# Helpers
+_max_append = email.quoprimime._max_append
+
+
+
+def decode_header(header):
+ """Decode a message header value without converting charset.
+
+ Returns a list of (string, charset) pairs containing each of the decoded
+ parts of the header. Charset is None for non-encoded parts of the header,
+ otherwise a lower-case string containing the name of the character set
+ specified in the encoded string.
+
+ header may be a string that may or may not contain RFC2047 encoded words,
+ or it may be a Header object.
+
+ An email.errors.HeaderParseError may be raised when certain decoding error
+ occurs (e.g. a base64 decoding exception).
+ """
+ # If it is a Header object, we can just return the encoded chunks.
+ if hasattr(header, '_chunks'):
+ return [(_charset._encode(string, str(charset)), str(charset))
+ for string, charset in header._chunks]
+ # If no encoding, just return the header with no charset.
+ if not ecre.search(header):
+ return [(header, None)]
+ # First step is to parse all the encoded parts into triplets of the form
+ # (encoded_string, encoding, charset). For unencoded strings, the last
+ # two parts will be None.
+ words = []
+ for line in header.splitlines():
+ parts = ecre.split(line)
+ first = True
+ while parts:
+ unencoded = parts.pop(0)
+ if first:
+ unencoded = unencoded.lstrip()
+ first = False
+ if unencoded:
+ words.append((unencoded, None, None))
+ if parts:
+ charset = parts.pop(0).lower()
+ encoding = parts.pop(0).lower()
+ encoded = parts.pop(0)
+ words.append((encoded, encoding, charset))
+ # Now loop over words and remove words that consist of whitespace
+ # between two encoded strings.
+ droplist = []
+ for n, w in enumerate(words):
+ if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace():
+ droplist.append(n-1)
+ for d in reversed(droplist):
+ del words[d]
+
+ # The next step is to decode each encoded word by applying the reverse
+ # base64 or quopri transformation. decoded_words is now a list of the
+ # form (decoded_word, charset).
+ decoded_words = []
+ for encoded_string, encoding, charset in words:
+ if encoding is None:
+ # This is an unencoded word.
+ decoded_words.append((encoded_string, charset))
+ elif encoding == 'q':
+ word = email.quoprimime.header_decode(encoded_string)
+ decoded_words.append((word, charset))
+ elif encoding == 'b':
+ paderr = len(encoded_string) % 4 # Postel's law: add missing padding
+ if paderr:
+ encoded_string += '==='[:4 - paderr]
+ try:
+ word = email.base64mime.decode(encoded_string)
+ except binascii.Error:
+ raise HeaderParseError('Base64 decoding error')
+ else:
+ decoded_words.append((word, charset))
+ else:
+ raise AssertionError('Unexpected encoding: ' + encoding)
+ # Now convert all words to bytes and collapse consecutive runs of
+ # similarly encoded words.
+ collapsed = []
+ last_word = last_charset = None
+ for word, charset in decoded_words:
+ if isinstance(word, str):
+ word = bytes(word, 'raw-unicode-escape')
+ if last_word is None:
+ last_word = word
+ last_charset = charset
+ elif charset != last_charset:
+ collapsed.append((last_word, last_charset))
+ last_word = word
+ last_charset = charset
+ elif last_charset is None:
+ last_word += BSPACE + word
+ else:
+ last_word += word
+ collapsed.append((last_word, last_charset))
+ return collapsed
+
+
+
+def make_header(decoded_seq, maxlinelen=None, header_name=None,
+ continuation_ws=' '):
+ """Create a Header from a sequence of pairs as returned by decode_header()
+
+ decode_header() takes a header value string and returns a sequence of
+ pairs of the format (decoded_string, charset) where charset is the string
+ name of the character set.
+
+ This function takes one of those sequence of pairs and returns a Header
+ instance. Optional maxlinelen, header_name, and continuation_ws are as in
+ the Header constructor.
+ """
+ h = Header(maxlinelen=maxlinelen, header_name=header_name,
+ continuation_ws=continuation_ws)
+ for s, charset in decoded_seq:
+ # None means us-ascii but we can simply pass it on to h.append()
+ if charset is not None and not isinstance(charset, Charset):
+ charset = Charset(charset)
+ h.append(s, charset)
+ return h
+
+
+
+class Header:
+ def __init__(self, s=None, charset=None,
+ maxlinelen=None, header_name=None,
+ continuation_ws=' ', errors='strict'):
+ """Create a MIME-compliant header that can contain many character sets.
+
+ Optional s is the initial header value. If None, the initial header
+ value is not set. You can later append to the header with .append()
+ method calls. s may be a byte string or a Unicode string, but see the
+ .append() documentation for semantics.
+
+ Optional charset serves two purposes: it has the same meaning as the
+ charset argument to the .append() method. It also sets the default
+ character set for all subsequent .append() calls that omit the charset
+ argument. If charset is not provided in the constructor, the us-ascii
+ charset is used both as s's initial charset and as the default for
+ subsequent .append() calls.
+
+ The maximum line length can be specified explicitly via maxlinelen. For
+ splitting the first line to a shorter value (to account for the field
+ header which isn't included in s, e.g. `Subject') pass in the name of
+ the field in header_name. The default maxlinelen is 78 as recommended
+ by RFC 2822.
+
+ continuation_ws must be RFC 2822 compliant folding whitespace (usually
+ either a space or a hard tab) which will be prepended to continuation
+ lines.
+
+ errors is passed through to the .append() call.
+ """
+ if charset is None:
+ charset = USASCII
+ elif not isinstance(charset, Charset):
+ charset = Charset(charset)
+ self._charset = charset
+ self._continuation_ws = continuation_ws
+ self._chunks = []
+ if s is not None:
+ self.append(s, charset, errors)
+ if maxlinelen is None:
+ maxlinelen = MAXLINELEN
+ self._maxlinelen = maxlinelen
+ if header_name is None:
+ self._headerlen = 0
+ else:
+ # Take the separating colon and space into account.
+ self._headerlen = len(header_name) + 2
+
+ def __str__(self):
+ """Return the string value of the header."""
+ self._normalize()
+ uchunks = []
+ lastcs = None
+ lastspace = None
+ for string, charset in self._chunks:
+ # We must preserve spaces between encoded and non-encoded word
+ # boundaries, which means for us we need to add a space when we go
+ # from a charset to None/us-ascii, or from None/us-ascii to a
+ # charset. Only do this for the second and subsequent chunks.
+ # Don't add a space if the None/us-ascii string already has
+ # a space (trailing or leading depending on transition)
+ nextcs = charset
+ if nextcs == _charset.UNKNOWN8BIT:
+ original_bytes = string.encode('ascii', 'surrogateescape')
+ string = original_bytes.decode('ascii', 'replace')
+ if uchunks:
+ hasspace = string and self._nonctext(string[0])
+ if lastcs not in (None, 'us-ascii'):
+ if nextcs in (None, 'us-ascii') and not hasspace:
+ uchunks.append(SPACE)
+ nextcs = None
+ elif nextcs not in (None, 'us-ascii') and not lastspace:
+ uchunks.append(SPACE)
+ lastspace = string and self._nonctext(string[-1])
+ lastcs = nextcs
+ uchunks.append(string)
+ return EMPTYSTRING.join(uchunks)
+
+ # Rich comparison operators for equality only. BAW: does it make sense to
+ # have or explicitly disable <, <=, >, >= operators?
+ def __eq__(self, other):
+ # other may be a Header or a string. Both are fine so coerce
+ # ourselves to a unicode (of the unencoded header value), swap the
+ # args and do another comparison.
+ return other == str(self)
+
+ def append(self, s, charset=None, errors='strict'):
+ """Append a string to the MIME header.
+
+ Optional charset, if given, should be a Charset instance or the name
+ of a character set (which will be converted to a Charset instance). A
+ value of None (the default) means that the charset given in the
+ constructor is used.
+
+ s may be a byte string or a Unicode string. If it is a byte string
+ (i.e. isinstance(s, str) is false), then charset is the encoding of
+ that byte string, and a UnicodeError will be raised if the string
+ cannot be decoded with that charset. If s is a Unicode string, then
+ charset is a hint specifying the character set of the characters in
+ the string. In either case, when producing an RFC 2822 compliant
+ header using RFC 2047 rules, the string will be encoded using the
+ output codec of the charset. If the string cannot be encoded to the
+ output codec, a UnicodeError will be raised.
+
+ Optional `errors' is passed as the errors argument to the decode
+ call if s is a byte string.
+ """
+ if charset is None:
+ charset = self._charset
+ elif not isinstance(charset, Charset):
+ charset = Charset(charset)
+ if not isinstance(s, str):
+ input_charset = charset.input_codec or 'us-ascii'
+ if input_charset == _charset.UNKNOWN8BIT:
+ s = s.decode('us-ascii', 'surrogateescape')
+ else:
+ s = s.decode(input_charset, errors)
+ # Ensure that the bytes we're storing can be decoded to the output
+ # character set, otherwise an early error is raised.
+ output_charset = charset.output_codec or 'us-ascii'
+ if output_charset != _charset.UNKNOWN8BIT:
+ try:
+ s.encode(output_charset, errors)
+ except UnicodeEncodeError:
+ if output_charset!='us-ascii':
+ raise
+ charset = UTF8
+ self._chunks.append((s, charset))
+
+ def _nonctext(self, s):
+ """True if string s is not a ctext character of RFC822.
+ """
+ return s.isspace() or s in ('(', ')', '\\')
+
+ def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
+ r"""Encode a message header into an RFC-compliant format.
+
+ There are many issues involved in converting a given string for use in
+ an email header. Only certain character sets are readable in most
+ email clients, and as header strings can only contain a subset of
+ 7-bit ASCII, care must be taken to properly convert and encode (with
+ Base64 or quoted-printable) header strings. In addition, there is a
+ 75-character length limit on any given encoded header field, so
+ line-wrapping must be performed, even with double-byte character sets.
+
+ Optional maxlinelen specifies the maximum length of each generated
+ line, exclusive of the linesep string. Individual lines may be longer
+ than maxlinelen if a folding point cannot be found. The first line
+ will be shorter by the length of the header name plus ": " if a header
+ name was specified at Header construction time. The default value for
+ maxlinelen is determined at header construction time.
+
+ Optional splitchars is a string containing characters which should be
+ given extra weight by the splitting algorithm during normal header
+ wrapping. This is in very rough support of RFC 2822's `higher level
+ syntactic breaks': split points preceded by a splitchar are preferred
+ during line splitting, with the characters preferred in the order in
+ which they appear in the string. Space and tab may be included in the
+ string to indicate whether preference should be given to one over the
+ other as a split point when other split chars do not appear in the line
+ being split. Splitchars does not affect RFC 2047 encoded lines.
+
+ Optional linesep is a string to be used to separate the lines of
+ the value. The default value is the most useful for typical
+ Python applications, but it can be set to \r\n to produce RFC-compliant
+ line separators when needed.
+ """
+ self._normalize()
+ if maxlinelen is None:
+ maxlinelen = self._maxlinelen
+ # A maxlinelen of 0 means don't wrap. For all practical purposes,
+ # choosing a huge number here accomplishes that and makes the
+ # _ValueFormatter algorithm much simpler.
+ if maxlinelen == 0:
+ maxlinelen = 1000000
+ formatter = _ValueFormatter(self._headerlen, maxlinelen,
+ self._continuation_ws, splitchars)
+ lastcs = None
+ hasspace = lastspace = None
+ for string, charset in self._chunks:
+ if hasspace is not None:
+ hasspace = string and self._nonctext(string[0])
+ if lastcs not in (None, 'us-ascii'):
+ if not hasspace or charset not in (None, 'us-ascii'):
+ formatter.add_transition()
+ elif charset not in (None, 'us-ascii') and not lastspace:
+ formatter.add_transition()
+ lastspace = string and self._nonctext(string[-1])
+ lastcs = charset
+ hasspace = False
+ lines = string.splitlines()
+ if lines:
+ formatter.feed('', lines[0], charset)
+ else:
+ formatter.feed('', '', charset)
+ for line in lines[1:]:
+ formatter.newline()
+ if charset.header_encoding is not None:
+ formatter.feed(self._continuation_ws, ' ' + line.lstrip(),
+ charset)
+ else:
+ sline = line.lstrip()
+ fws = line[:len(line)-len(sline)]
+ formatter.feed(fws, sline, charset)
+ if len(lines) > 1:
+ formatter.newline()
+ if self._chunks:
+ formatter.add_transition()
+ value = formatter._str(linesep)
+ if _embedded_header.search(value):
+ raise HeaderParseError("header value appears to contain "
+ "an embedded header: {!r}".format(value))
+ return value
+
+ def _normalize(self):
+ # Step 1: Normalize the chunks so that all runs of identical charsets
+ # get collapsed into a single unicode string.
+ chunks = []
+ last_charset = None
+ last_chunk = []
+ for string, charset in self._chunks:
+ if charset == last_charset:
+ last_chunk.append(string)
+ else:
+ if last_charset is not None:
+ chunks.append((SPACE.join(last_chunk), last_charset))
+ last_chunk = [string]
+ last_charset = charset
+ if last_chunk:
+ chunks.append((SPACE.join(last_chunk), last_charset))
+ self._chunks = chunks
+
+
+
+class _ValueFormatter:
+ def __init__(self, headerlen, maxlen, continuation_ws, splitchars):
+ self._maxlen = maxlen
+ self._continuation_ws = continuation_ws
+ self._continuation_ws_len = len(continuation_ws)
+ self._splitchars = splitchars
+ self._lines = []
+ self._current_line = _Accumulator(headerlen)
+
+ def _str(self, linesep):
+ self.newline()
+ return linesep.join(self._lines)
+
+ def __str__(self):
+ return self._str(NL)
+
+ def newline(self):
+ end_of_line = self._current_line.pop()
+ if end_of_line != (' ', ''):
+ self._current_line.push(*end_of_line)
+ if len(self._current_line) > 0:
+ if self._current_line.is_onlyws() and self._lines:
+ self._lines[-1] += str(self._current_line)
+ else:
+ self._lines.append(str(self._current_line))
+ self._current_line.reset()
+
+ def add_transition(self):
+ self._current_line.push(' ', '')
+
+ def feed(self, fws, string, charset):
+ # If the charset has no header encoding (i.e. it is an ASCII encoding)
+ # then we must split the header at the "highest level syntactic break"
+ # possible. Note that we don't have a lot of smarts about field
+ # syntax; we just try to break on semi-colons, then commas, then
+ # whitespace. Eventually, this should be pluggable.
+ if charset.header_encoding is None:
+ self._ascii_split(fws, string, self._splitchars)
+ return
+ # Otherwise, we're doing either a Base64 or a quoted-printable
+ # encoding which means we don't need to split the line on syntactic
+ # breaks. We can basically just find enough characters to fit on the
+ # current line, minus the RFC 2047 chrome. What makes this trickier
+ # though is that we have to split at octet boundaries, not character
+ # boundaries but it's only safe to split at character boundaries so at
+ # best we can only get close.
+ encoded_lines = charset.header_encode_lines(string, self._maxlengths())
+ # The first element extends the current line, but if it's None then
+ # nothing more fit on the current line so start a new line.
+ try:
+ first_line = encoded_lines.pop(0)
+ except IndexError:
+ # There are no encoded lines, so we're done.
+ return
+ if first_line is not None:
+ self._append_chunk(fws, first_line)
+ try:
+ last_line = encoded_lines.pop()
+ except IndexError:
+ # There was only one line.
+ return
+ self.newline()
+ self._current_line.push(self._continuation_ws, last_line)
+ # Everything else are full lines in themselves.
+ for line in encoded_lines:
+ self._lines.append(self._continuation_ws + line)
+
+ def _maxlengths(self):
+ # The first line's length.
+ yield self._maxlen - len(self._current_line)
+ while True:
+ yield self._maxlen - self._continuation_ws_len
+
+ def _ascii_split(self, fws, string, splitchars):
+ # The RFC 2822 header folding algorithm is simple in principle but
+ # complex in practice. Lines may be folded any place where "folding
+ # white space" appears by inserting a linesep character in front of the
+ # FWS. The complication is that not all spaces or tabs qualify as FWS,
+ # and we are also supposed to prefer to break at "higher level
+ # syntactic breaks". We can't do either of these without intimate
+ # knowledge of the structure of structured headers, which we don't have
+ # here. So the best we can do here is prefer to break at the specified
+ # splitchars, and hope that we don't choose any spaces or tabs that
+ # aren't legal FWS. (This is at least better than the old algorithm,
+ # where we would sometimes *introduce* FWS after a splitchar, or the
+ # algorithm before that, where we would turn all white space runs into
+ # single spaces or tabs.)
+ parts = re.split("(["+FWS+"]+)", fws+string)
+ if parts[0]:
+ parts[:0] = ['']
+ else:
+ parts.pop(0)
+ for fws, part in zip(*[iter(parts)]*2):
+ self._append_chunk(fws, part)
+
+ def _append_chunk(self, fws, string):
+ self._current_line.push(fws, string)
+ if len(self._current_line) > self._maxlen:
+ # Find the best split point, working backward from the end.
+ # There might be none, on a long first line.
+ for ch in self._splitchars:
+ for i in range(self._current_line.part_count()-1, 0, -1):
+ if ch.isspace():
+ fws = self._current_line[i][0]
+ if fws and fws[0]==ch:
+ break
+ prevpart = self._current_line[i-1][1]
+ if prevpart and prevpart[-1]==ch:
+ break
+ else:
+ continue
+ break
+ else:
+ fws, part = self._current_line.pop()
+ if self._current_line._initial_size > 0:
+ # There will be a header, so leave it on a line by itself.
+ self.newline()
+ if not fws:
+ # We don't use continuation_ws here because the whitespace
+ # after a header should always be a space.
+ fws = ' '
+ self._current_line.push(fws, part)
+ return
+ remainder = self._current_line.pop_from(i)
+ self._lines.append(str(self._current_line))
+ self._current_line.reset(remainder)
+
+
+class _Accumulator(list):
+
+ def __init__(self, initial_size=0):
+ self._initial_size = initial_size
+ super().__init__()
+
+ def push(self, fws, string):
+ self.append((fws, string))
+
+ def pop_from(self, i=0):
+ popped = self[i:]
+ self[i:] = []
+ return popped
+
+ def pop(self):
+ if self.part_count()==0:
+ return ('', '')
+ return super().pop()
+
+ def __len__(self):
+ return sum((len(fws)+len(part) for fws, part in self),
+ self._initial_size)
+
+ def __str__(self):
+ return EMPTYSTRING.join((EMPTYSTRING.join((fws, part))
+ for fws, part in self))
+
+ def reset(self, startval=None):
+ if startval is None:
+ startval = []
+ self[:] = startval
+ self._initial_size = 0
+
+ def is_onlyws(self):
+ return self._initial_size==0 and (not self or str(self).isspace())
+
+ def part_count(self):
+ return super().__len__()
diff --git a/dist/lib/email/headerregistry.py b/dist/lib/email/headerregistry.py
new file mode 100644
index 0000000..d0914fd
--- /dev/null
+++ b/dist/lib/email/headerregistry.py
@@ -0,0 +1,607 @@
+"""Representing and manipulating email headers via custom objects.
+
+This module provides an implementation of the HeaderRegistry API.
+The implementation is designed to flexibly follow RFC5322 rules.
+
+Eventually HeaderRegistry will be a public API, but it isn't yet,
+and will probably change some before that happens.
+
+"""
+from types import MappingProxyType
+
+from email import utils
+from email import errors
+from email import _header_value_parser as parser
+
+class Address:
+
+ def __init__(self, display_name='', username='', domain='', addr_spec=None):
+ """Create an object representing a full email address.
+
+ An address can have a 'display_name', a 'username', and a 'domain'. In
+ addition to specifying the username and domain separately, they may be
+ specified together by using the addr_spec keyword *instead of* the
+ username and domain keywords. If an addr_spec string is specified it
+ must be properly quoted according to RFC 5322 rules; an error will be
+ raised if it is not.
+
+ An Address object has display_name, username, domain, and addr_spec
+ attributes, all of which are read-only. The addr_spec and the string
+ value of the object are both quoted according to RFC5322 rules, but
+ without any Content Transfer Encoding.
+
+ """
+
+ inputs = ''.join(filter(None, (display_name, username, domain, addr_spec)))
+ if '\r' in inputs or '\n' in inputs:
+ raise ValueError("invalid arguments; address parts cannot contain CR or LF")
+
+ # This clause with its potential 'raise' may only happen when an
+ # application program creates an Address object using an addr_spec
+ # keyword. The email library code itself must always supply username
+ # and domain.
+ if addr_spec is not None:
+ if username or domain:
+ raise TypeError("addrspec specified when username and/or "
+ "domain also specified")
+ a_s, rest = parser.get_addr_spec(addr_spec)
+ if rest:
+ raise ValueError("Invalid addr_spec; only '{}' "
+ "could be parsed from '{}'".format(
+ a_s, addr_spec))
+ if a_s.all_defects:
+ raise a_s.all_defects[0]
+ username = a_s.local_part
+ domain = a_s.domain
+ self._display_name = display_name
+ self._username = username
+ self._domain = domain
+
+ @property
+ def display_name(self):
+ return self._display_name
+
+ @property
+ def username(self):
+ return self._username
+
+ @property
+ def domain(self):
+ return self._domain
+
+ @property
+ def addr_spec(self):
+ """The addr_spec (username@domain) portion of the address, quoted
+ according to RFC 5322 rules, but with no Content Transfer Encoding.
+ """
+ nameset = set(self.username)
+ if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
+ lp = parser.quote_string(self.username)
+ else:
+ lp = self.username
+ if self.domain:
+ return lp + '@' + self.domain
+ if not lp:
+ return '<>'
+ return lp
+
+ def __repr__(self):
+ return "{}(display_name={!r}, username={!r}, domain={!r})".format(
+ self.__class__.__name__,
+ self.display_name, self.username, self.domain)
+
+ def __str__(self):
+ nameset = set(self.display_name)
+ if len(nameset) > len(nameset-parser.SPECIALS):
+ disp = parser.quote_string(self.display_name)
+ else:
+ disp = self.display_name
+ if disp:
+ addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
+ return "{} <{}>".format(disp, addr_spec)
+ return self.addr_spec
+
+ def __eq__(self, other):
+ if type(other) != type(self):
+ return False
+ return (self.display_name == other.display_name and
+ self.username == other.username and
+ self.domain == other.domain)
+
+
+class Group:
+
+ def __init__(self, display_name=None, addresses=None):
+ """Create an object representing an address group.
+
+ An address group consists of a display_name followed by colon and a
+ list of addresses (see Address) terminated by a semi-colon. The Group
+ is created by specifying a display_name and a possibly empty list of
+ Address objects. A Group can also be used to represent a single
+ address that is not in a group, which is convenient when manipulating
+ lists that are a combination of Groups and individual Addresses. In
+ this case the display_name should be set to None. In particular, the
+ string representation of a Group whose display_name is None is the same
+ as the Address object, if there is one and only one Address object in
+ the addresses list.
+
+ """
+ self._display_name = display_name
+ self._addresses = tuple(addresses) if addresses else tuple()
+
+ @property
+ def display_name(self):
+ return self._display_name
+
+ @property
+ def addresses(self):
+ return self._addresses
+
+ def __repr__(self):
+ return "{}(display_name={!r}, addresses={!r}".format(
+ self.__class__.__name__,
+ self.display_name, self.addresses)
+
+ def __str__(self):
+ if self.display_name is None and len(self.addresses)==1:
+ return str(self.addresses[0])
+ disp = self.display_name
+ if disp is not None:
+ nameset = set(disp)
+ if len(nameset) > len(nameset-parser.SPECIALS):
+ disp = parser.quote_string(disp)
+ adrstr = ", ".join(str(x) for x in self.addresses)
+ adrstr = ' ' + adrstr if adrstr else adrstr
+ return "{}:{};".format(disp, adrstr)
+
+ def __eq__(self, other):
+ if type(other) != type(self):
+ return False
+ return (self.display_name == other.display_name and
+ self.addresses == other.addresses)
+
+
+# Header Classes #
+
+class BaseHeader(str):
+
+ """Base class for message headers.
+
+ Implements generic behavior and provides tools for subclasses.
+
+ A subclass must define a classmethod named 'parse' that takes an unfolded
+ value string and a dictionary as its arguments. The dictionary will
+ contain one key, 'defects', initialized to an empty list. After the call
+ the dictionary must contain two additional keys: parse_tree, set to the
+ parse tree obtained from parsing the header, and 'decoded', set to the
+ string value of the idealized representation of the data from the value.
+ (That is, encoded words are decoded, and values that have canonical
+ representations are so represented.)
+
+ The defects key is intended to collect parsing defects, which the message
+ parser will subsequently dispose of as appropriate. The parser should not,
+ insofar as practical, raise any errors. Defects should be added to the
+ list instead. The standard header parsers register defects for RFC
+ compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
+ errors.
+
+ The parse method may add additional keys to the dictionary. In this case
+ the subclass must define an 'init' method, which will be passed the
+ dictionary as its keyword arguments. The method should use (usually by
+ setting them as the value of similarly named attributes) and remove all the
+ extra keys added by its parse method, and then use super to call its parent
+ class with the remaining arguments and keywords.
+
+ The subclass should also make sure that a 'max_count' attribute is defined
+ that is either None or 1. XXX: need to better define this API.
+
+ """
+
+ def __new__(cls, name, value):
+ kwds = {'defects': []}
+ cls.parse(value, kwds)
+ if utils._has_surrogates(kwds['decoded']):
+ kwds['decoded'] = utils._sanitize(kwds['decoded'])
+ self = str.__new__(cls, kwds['decoded'])
+ del kwds['decoded']
+ self.init(name, **kwds)
+ return self
+
+ def init(self, name, *, parse_tree, defects):
+ self._name = name
+ self._parse_tree = parse_tree
+ self._defects = defects
+
+ @property
+ def name(self):
+ return self._name
+
+ @property
+ def defects(self):
+ return tuple(self._defects)
+
+ def __reduce__(self):
+ return (
+ _reconstruct_header,
+ (
+ self.__class__.__name__,
+ self.__class__.__bases__,
+ str(self),
+ ),
+ self.__dict__)
+
+ @classmethod
+ def _reconstruct(cls, value):
+ return str.__new__(cls, value)
+
+ def fold(self, *, policy):
+ """Fold header according to policy.
+
+ The parsed representation of the header is folded according to
+ RFC5322 rules, as modified by the policy. If the parse tree
+ contains surrogateescaped bytes, the bytes are CTE encoded using
+ the charset 'unknown-8bit".
+
+ Any non-ASCII characters in the parse tree are CTE encoded using
+ charset utf-8. XXX: make this a policy setting.
+
+ The returned value is an ASCII-only string possibly containing linesep
+ characters, and ending with a linesep character. The string includes
+ the header name and the ': ' separator.
+
+ """
+ # At some point we need to put fws here if it was in the source.
+ header = parser.Header([
+ parser.HeaderLabel([
+ parser.ValueTerminal(self.name, 'header-name'),
+ parser.ValueTerminal(':', 'header-sep')]),
+ ])
+ if self._parse_tree:
+ header.append(
+ parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]))
+ header.append(self._parse_tree)
+ return header.fold(policy=policy)
+
+
+def _reconstruct_header(cls_name, bases, value):
+ return type(cls_name, bases, {})._reconstruct(value)
+
+
+class UnstructuredHeader:
+
+ max_count = None
+ value_parser = staticmethod(parser.get_unstructured)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = cls.value_parser(value)
+ kwds['decoded'] = str(kwds['parse_tree'])
+
+
+class UniqueUnstructuredHeader(UnstructuredHeader):
+
+ max_count = 1
+
+
+class DateHeader:
+
+ """Header whose value consists of a single timestamp.
+
+ Provides an additional attribute, datetime, which is either an aware
+ datetime using a timezone, or a naive datetime if the timezone
+ in the input string is -0000. Also accepts a datetime as input.
+ The 'value' attribute is the normalized form of the timestamp,
+ which means it is the output of format_datetime on the datetime.
+ """
+
+ max_count = None
+
+ # This is used only for folding, not for creating 'decoded'.
+ value_parser = staticmethod(parser.get_unstructured)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ if not value:
+ kwds['defects'].append(errors.HeaderMissingRequiredValue())
+ kwds['datetime'] = None
+ kwds['decoded'] = ''
+ kwds['parse_tree'] = parser.TokenList()
+ return
+ if isinstance(value, str):
+ value = utils.parsedate_to_datetime(value)
+ kwds['datetime'] = value
+ kwds['decoded'] = utils.format_datetime(kwds['datetime'])
+ kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
+
+ def init(self, *args, **kw):
+ self._datetime = kw.pop('datetime')
+ super().init(*args, **kw)
+
+ @property
+ def datetime(self):
+ return self._datetime
+
+
+class UniqueDateHeader(DateHeader):
+
+ max_count = 1
+
+
+class AddressHeader:
+
+ max_count = None
+
+ @staticmethod
+ def value_parser(value):
+ address_list, value = parser.get_address_list(value)
+ assert not value, 'this should not happen'
+ return address_list
+
+ @classmethod
+ def parse(cls, value, kwds):
+ if isinstance(value, str):
+ # We are translating here from the RFC language (address/mailbox)
+ # to our API language (group/address).
+ kwds['parse_tree'] = address_list = cls.value_parser(value)
+ groups = []
+ for addr in address_list.addresses:
+ groups.append(Group(addr.display_name,
+ [Address(mb.display_name or '',
+ mb.local_part or '',
+ mb.domain or '')
+ for mb in addr.all_mailboxes]))
+ defects = list(address_list.all_defects)
+ else:
+ # Assume it is Address/Group stuff
+ if not hasattr(value, '__iter__'):
+ value = [value]
+ groups = [Group(None, [item]) if not hasattr(item, 'addresses')
+ else item
+ for item in value]
+ defects = []
+ kwds['groups'] = groups
+ kwds['defects'] = defects
+ kwds['decoded'] = ', '.join([str(item) for item in groups])
+ if 'parse_tree' not in kwds:
+ kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
+
+ def init(self, *args, **kw):
+ self._groups = tuple(kw.pop('groups'))
+ self._addresses = None
+ super().init(*args, **kw)
+
+ @property
+ def groups(self):
+ return self._groups
+
+ @property
+ def addresses(self):
+ if self._addresses is None:
+ self._addresses = tuple(address for group in self._groups
+ for address in group.addresses)
+ return self._addresses
+
+
+class UniqueAddressHeader(AddressHeader):
+
+ max_count = 1
+
+
+class SingleAddressHeader(AddressHeader):
+
+ @property
+ def address(self):
+ if len(self.addresses)!=1:
+ raise ValueError(("value of single address header {} is not "
+ "a single address").format(self.name))
+ return self.addresses[0]
+
+
+class UniqueSingleAddressHeader(SingleAddressHeader):
+
+ max_count = 1
+
+
+class MIMEVersionHeader:
+
+ max_count = 1
+
+ value_parser = staticmethod(parser.parse_mime_version)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+ kwds['major'] = None if parse_tree.minor is None else parse_tree.major
+ kwds['minor'] = parse_tree.minor
+ if parse_tree.minor is not None:
+ kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
+ else:
+ kwds['version'] = None
+
+ def init(self, *args, **kw):
+ self._version = kw.pop('version')
+ self._major = kw.pop('major')
+ self._minor = kw.pop('minor')
+ super().init(*args, **kw)
+
+ @property
+ def major(self):
+ return self._major
+
+ @property
+ def minor(self):
+ return self._minor
+
+ @property
+ def version(self):
+ return self._version
+
+
+class ParameterizedMIMEHeader:
+
+ # Mixin that handles the params dict. Must be subclassed and
+ # a property value_parser for the specific header provided.
+
+ max_count = 1
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+ if parse_tree.params is None:
+ kwds['params'] = {}
+ else:
+ # The MIME RFCs specify that parameter ordering is arbitrary.
+ kwds['params'] = {utils._sanitize(name).lower():
+ utils._sanitize(value)
+ for name, value in parse_tree.params}
+
+ def init(self, *args, **kw):
+ self._params = kw.pop('params')
+ super().init(*args, **kw)
+
+ @property
+ def params(self):
+ return MappingProxyType(self._params)
+
+
+class ContentTypeHeader(ParameterizedMIMEHeader):
+
+ value_parser = staticmethod(parser.parse_content_type_header)
+
+ def init(self, *args, **kw):
+ super().init(*args, **kw)
+ self._maintype = utils._sanitize(self._parse_tree.maintype)
+ self._subtype = utils._sanitize(self._parse_tree.subtype)
+
+ @property
+ def maintype(self):
+ return self._maintype
+
+ @property
+ def subtype(self):
+ return self._subtype
+
+ @property
+ def content_type(self):
+ return self.maintype + '/' + self.subtype
+
+
+class ContentDispositionHeader(ParameterizedMIMEHeader):
+
+ value_parser = staticmethod(parser.parse_content_disposition_header)
+
+ def init(self, *args, **kw):
+ super().init(*args, **kw)
+ cd = self._parse_tree.content_disposition
+ self._content_disposition = cd if cd is None else utils._sanitize(cd)
+
+ @property
+ def content_disposition(self):
+ return self._content_disposition
+
+
+class ContentTransferEncodingHeader:
+
+ max_count = 1
+
+ value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+
+ def init(self, *args, **kw):
+ super().init(*args, **kw)
+ self._cte = utils._sanitize(self._parse_tree.cte)
+
+ @property
+ def cte(self):
+ return self._cte
+
+
+class MessageIDHeader:
+
+ max_count = 1
+ value_parser = staticmethod(parser.parse_message_id)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+
+
+# The header factory #
+
+_default_header_map = {
+ 'subject': UniqueUnstructuredHeader,
+ 'date': UniqueDateHeader,
+ 'resent-date': DateHeader,
+ 'orig-date': UniqueDateHeader,
+ 'sender': UniqueSingleAddressHeader,
+ 'resent-sender': SingleAddressHeader,
+ 'to': UniqueAddressHeader,
+ 'resent-to': AddressHeader,
+ 'cc': UniqueAddressHeader,
+ 'resent-cc': AddressHeader,
+ 'bcc': UniqueAddressHeader,
+ 'resent-bcc': AddressHeader,
+ 'from': UniqueAddressHeader,
+ 'resent-from': AddressHeader,
+ 'reply-to': UniqueAddressHeader,
+ 'mime-version': MIMEVersionHeader,
+ 'content-type': ContentTypeHeader,
+ 'content-disposition': ContentDispositionHeader,
+ 'content-transfer-encoding': ContentTransferEncodingHeader,
+ 'message-id': MessageIDHeader,
+ }
+
+class HeaderRegistry:
+
+ """A header_factory and header registry."""
+
+ def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
+ use_default_map=True):
+ """Create a header_factory that works with the Policy API.
+
+ base_class is the class that will be the last class in the created
+ header class's __bases__ list. default_class is the class that will be
+ used if "name" (see __call__) does not appear in the registry.
+ use_default_map controls whether or not the default mapping of names to
+ specialized classes is copied in to the registry when the factory is
+ created. The default is True.
+
+ """
+ self.registry = {}
+ self.base_class = base_class
+ self.default_class = default_class
+ if use_default_map:
+ self.registry.update(_default_header_map)
+
+ def map_to_type(self, name, cls):
+ """Register cls as the specialized class for handling "name" headers.
+
+ """
+ self.registry[name.lower()] = cls
+
+ def __getitem__(self, name):
+ cls = self.registry.get(name.lower(), self.default_class)
+ return type('_'+cls.__name__, (cls, self.base_class), {})
+
+ def __call__(self, name, value):
+ """Create a header instance for header 'name' from 'value'.
+
+ Creates a header instance by creating a specialized class for parsing
+ and representing the specified header by combining the factory
+ base_class with a specialized class from the registry or the
+ default_class, and passing the name and value to the constructed
+ class's constructor.
+
+ """
+ return self[name](name, value)
diff --git a/dist/lib/email/iterators.py b/dist/lib/email/iterators.py
new file mode 100644
index 0000000..b5502ee
--- /dev/null
+++ b/dist/lib/email/iterators.py
@@ -0,0 +1,71 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Various types of useful iterators and generators."""
+
+__all__ = [
+ 'body_line_iterator',
+ 'typed_subpart_iterator',
+ 'walk',
+ # Do not include _structure() since it's part of the debugging API.
+ ]
+
+import sys
+from io import StringIO
+
+
+
+# This function will become a method of the Message class
+def walk(self):
+ """Walk over the message tree, yielding each subpart.
+
+ The walk is performed in depth-first order. This method is a
+ generator.
+ """
+ yield self
+ if self.is_multipart():
+ for subpart in self.get_payload():
+ yield from subpart.walk()
+
+
+
+# These two functions are imported into the Iterators.py interface module.
+def body_line_iterator(msg, decode=False):
+ """Iterate over the parts, returning string payloads line-by-line.
+
+ Optional decode (default False) is passed through to .get_payload().
+ """
+ for subpart in msg.walk():
+ payload = subpart.get_payload(decode=decode)
+ if isinstance(payload, str):
+ yield from StringIO(payload)
+
+
+def typed_subpart_iterator(msg, maintype='text', subtype=None):
+ """Iterate over the subparts with a given MIME type.
+
+ Use `maintype' as the main MIME type to match against; this defaults to
+ "text". Optional `subtype' is the MIME subtype to match against; if
+ omitted, only the main type is matched.
+ """
+ for subpart in msg.walk():
+ if subpart.get_content_maintype() == maintype:
+ if subtype is None or subpart.get_content_subtype() == subtype:
+ yield subpart
+
+
+
+def _structure(msg, fp=None, level=0, include_default=False):
+ """A handy debugging aid"""
+ if fp is None:
+ fp = sys.stdout
+ tab = ' ' * (level * 4)
+ print(tab + msg.get_content_type(), end='', file=fp)
+ if include_default:
+ print(' [%s]' % msg.get_default_type(), file=fp)
+ else:
+ print(file=fp)
+ if msg.is_multipart():
+ for subpart in msg.get_payload():
+ _structure(subpart, fp, level+1, include_default)
diff --git a/dist/lib/email/message.py b/dist/lib/email/message.py
new file mode 100644
index 0000000..1262602
--- /dev/null
+++ b/dist/lib/email/message.py
@@ -0,0 +1,1173 @@
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Basic message object for the email package object model."""
+
+__all__ = ['Message', 'EmailMessage']
+
+import re
+import uu
+import quopri
+from io import BytesIO, StringIO
+
+# Intrapackage imports
+from email import utils
+from email import errors
+from email._policybase import Policy, compat32
+from email import charset as _charset
+from email._encoded_words import decode_b
+Charset = _charset.Charset
+
+SEMISPACE = '; '
+
+# Regular expression that matches `special' characters in parameters, the
+# existence of which force quoting of the parameter value.
+tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
+
+
+def _splitparam(param):
+ # Split header parameters. BAW: this may be too simple. It isn't
+ # strictly RFC 2045 (section 5.1) compliant, but it catches most headers
+ # found in the wild. We may eventually need a full fledged parser.
+ # RDM: we might have a Header here; for now just stringify it.
+ a, sep, b = str(param).partition(';')
+ if not sep:
+ return a.strip(), None
+ return a.strip(), b.strip()
+
+def _formatparam(param, value=None, quote=True):
+ """Convenience function to format and return a key=value pair.
+
+ This will quote the value if needed or if quote is true. If value is a
+ three tuple (charset, language, value), it will be encoded according
+ to RFC2231 rules. If it contains non-ascii characters it will likewise
+ be encoded according to RFC2231 rules, using the utf-8 charset and
+ a null language.
+ """
+ if value is not None and len(value) > 0:
+ # A tuple is used for RFC 2231 encoded parameter values where items
+ # are (charset, language, value). charset is a string, not a Charset
+ # instance. RFC 2231 encoded values are never quoted, per RFC.
+ if isinstance(value, tuple):
+ # Encode as per RFC 2231
+ param += '*'
+ value = utils.encode_rfc2231(value[2], value[0], value[1])
+ return '%s=%s' % (param, value)
+ else:
+ try:
+ value.encode('ascii')
+ except UnicodeEncodeError:
+ param += '*'
+ value = utils.encode_rfc2231(value, 'utf-8', '')
+ return '%s=%s' % (param, value)
+ # BAW: Please check this. I think that if quote is set it should
+ # force quoting even if not necessary.
+ if quote or tspecials.search(value):
+ return '%s="%s"' % (param, utils.quote(value))
+ else:
+ return '%s=%s' % (param, value)
+ else:
+ return param
+
+def _parseparam(s):
+ # RDM This might be a Header, so for now stringify it.
+ s = ';' + str(s)
+ plist = []
+ while s[:1] == ';':
+ s = s[1:]
+ end = s.find(';')
+ while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
+ end = s.find(';', end + 1)
+ if end < 0:
+ end = len(s)
+ f = s[:end]
+ if '=' in f:
+ i = f.index('=')
+ f = f[:i].strip().lower() + '=' + f[i+1:].strip()
+ plist.append(f.strip())
+ s = s[end:]
+ return plist
+
+
+def _unquotevalue(value):
+ # This is different than utils.collapse_rfc2231_value() because it doesn't
+ # try to convert the value to a unicode. Message.get_param() and
+ # Message.get_params() are both currently defined to return the tuple in
+ # the face of RFC 2231 parameters.
+ if isinstance(value, tuple):
+ return value[0], value[1], utils.unquote(value[2])
+ else:
+ return utils.unquote(value)
+
+
+
+class Message:
+ """Basic message object.
+
+ A message object is defined as something that has a bunch of RFC 2822
+ headers and a payload. It may optionally have an envelope header
+ (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
+ multipart or a message/rfc822), then the payload is a list of Message
+ objects, otherwise it is a string.
+
+ Message objects implement part of the `mapping' interface, which assumes
+ there is exactly one occurrence of the header per message. Some headers
+ do in fact appear multiple times (e.g. Received) and for those headers,
+ you must use the explicit API to set or get all the headers. Not all of
+ the mapping methods are implemented.
+ """
+ def __init__(self, policy=compat32):
+ self.policy = policy
+ self._headers = []
+ self._unixfrom = None
+ self._payload = None
+ self._charset = None
+ # Defaults for multipart messages
+ self.preamble = self.epilogue = None
+ self.defects = []
+ # Default content type
+ self._default_type = 'text/plain'
+
+ def __str__(self):
+ """Return the entire formatted message as a string.
+ """
+ return self.as_string()
+
+ def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
+ """Return the entire formatted message as a string.
+
+ Optional 'unixfrom', when true, means include the Unix From_ envelope
+ header. For backward compatibility reasons, if maxheaderlen is
+ not specified it defaults to 0, so you must override it explicitly
+ if you want a different maxheaderlen. 'policy' is passed to the
+ Generator instance used to serialize the mesasge; if it is not
+ specified the policy associated with the message instance is used.
+
+ If the message object contains binary data that is not encoded
+ according to RFC standards, the non-compliant data will be replaced by
+ unicode "unknown character" code points.
+ """
+ from email.generator import Generator
+ policy = self.policy if policy is None else policy
+ fp = StringIO()
+ g = Generator(fp,
+ mangle_from_=False,
+ maxheaderlen=maxheaderlen,
+ policy=policy)
+ g.flatten(self, unixfrom=unixfrom)
+ return fp.getvalue()
+
+ def __bytes__(self):
+ """Return the entire formatted message as a bytes object.
+ """
+ return self.as_bytes()
+
+ def as_bytes(self, unixfrom=False, policy=None):
+ """Return the entire formatted message as a bytes object.
+
+ Optional 'unixfrom', when true, means include the Unix From_ envelope
+ header. 'policy' is passed to the BytesGenerator instance used to
+ serialize the message; if not specified the policy associated with
+ the message instance is used.
+ """
+ from email.generator import BytesGenerator
+ policy = self.policy if policy is None else policy
+ fp = BytesIO()
+ g = BytesGenerator(fp, mangle_from_=False, policy=policy)
+ g.flatten(self, unixfrom=unixfrom)
+ return fp.getvalue()
+
+ def is_multipart(self):
+ """Return True if the message consists of multiple parts."""
+ return isinstance(self._payload, list)
+
+ #
+ # Unix From_ line
+ #
+ def set_unixfrom(self, unixfrom):
+ self._unixfrom = unixfrom
+
+ def get_unixfrom(self):
+ return self._unixfrom
+
+ #
+ # Payload manipulation.
+ #
+ def attach(self, payload):
+ """Add the given payload to the current payload.
+
+ The current payload will always be a list of objects after this method
+ is called. If you want to set the payload to a scalar object, use
+ set_payload() instead.
+ """
+ if self._payload is None:
+ self._payload = [payload]
+ else:
+ try:
+ self._payload.append(payload)
+ except AttributeError:
+ raise TypeError("Attach is not valid on a message with a"
+ " non-multipart payload")
+
+ def get_payload(self, i=None, decode=False):
+ """Return a reference to the payload.
+
+ The payload will either be a list object or a string. If you mutate
+ the list object, you modify the message's payload in place. Optional
+ i returns that index into the payload.
+
+ Optional decode is a flag indicating whether the payload should be
+ decoded or not, according to the Content-Transfer-Encoding header
+ (default is False).
+
+ When True and the message is not a multipart, the payload will be
+ decoded if this header's value is `quoted-printable' or `base64'. If
+ some other encoding is used, or the header is missing, or if the
+ payload has bogus data (i.e. bogus base64 or uuencoded data), the
+ payload is returned as-is.
+
+ If the message is a multipart and the decode flag is True, then None
+ is returned.
+ """
+ # Here is the logic table for this code, based on the email5.0.0 code:
+ # i decode is_multipart result
+ # ------ ------ ------------ ------------------------------
+ # None True True None
+ # i True True None
+ # None False True _payload (a list)
+ # i False True _payload element i (a Message)
+ # i False False error (not a list)
+ # i True False error (not a list)
+ # None False False _payload
+ # None True False _payload decoded (bytes)
+ # Note that Barry planned to factor out the 'decode' case, but that
+ # isn't so easy now that we handle the 8 bit data, which needs to be
+ # converted in both the decode and non-decode path.
+ if self.is_multipart():
+ if decode:
+ return None
+ if i is None:
+ return self._payload
+ else:
+ return self._payload[i]
+ # For backward compatibility, Use isinstance and this error message
+ # instead of the more logical is_multipart test.
+ if i is not None and not isinstance(self._payload, list):
+ raise TypeError('Expected list, got %s' % type(self._payload))
+ payload = self._payload
+ # cte might be a Header, so for now stringify it.
+ cte = str(self.get('content-transfer-encoding', '')).lower()
+ # payload may be bytes here.
+ if isinstance(payload, str):
+ if utils._has_surrogates(payload):
+ bpayload = payload.encode('ascii', 'surrogateescape')
+ if not decode:
+ try:
+ payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
+ except LookupError:
+ payload = bpayload.decode('ascii', 'replace')
+ elif decode:
+ try:
+ bpayload = payload.encode('ascii')
+ except UnicodeError:
+ # This won't happen for RFC compliant messages (messages
+ # containing only ASCII code points in the unicode input).
+ # If it does happen, turn the string into bytes in a way
+ # guaranteed not to fail.
+ bpayload = payload.encode('raw-unicode-escape')
+ if not decode:
+ return payload
+ if cte == 'quoted-printable':
+ return quopri.decodestring(bpayload)
+ elif cte == 'base64':
+ # XXX: this is a bit of a hack; decode_b should probably be factored
+ # out somewhere, but I haven't figured out where yet.
+ value, defects = decode_b(b''.join(bpayload.splitlines()))
+ for defect in defects:
+ self.policy.handle_defect(self, defect)
+ return value
+ elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
+ in_file = BytesIO(bpayload)
+ out_file = BytesIO()
+ try:
+ uu.decode(in_file, out_file, quiet=True)
+ return out_file.getvalue()
+ except uu.Error:
+ # Some decoding problem
+ return bpayload
+ if isinstance(payload, str):
+ return bpayload
+ return payload
+
+ def set_payload(self, payload, charset=None):
+ """Set the payload to the given value.
+
+ Optional charset sets the message's default character set. See
+ set_charset() for details.
+ """
+ if hasattr(payload, 'encode'):
+ if charset is None:
+ self._payload = payload
+ return
+ if not isinstance(charset, Charset):
+ charset = Charset(charset)
+ payload = payload.encode(charset.output_charset)
+ if hasattr(payload, 'decode'):
+ self._payload = payload.decode('ascii', 'surrogateescape')
+ else:
+ self._payload = payload
+ if charset is not None:
+ self.set_charset(charset)
+
+ def set_charset(self, charset):
+ """Set the charset of the payload to a given character set.
+
+ charset can be a Charset instance, a string naming a character set, or
+ None. If it is a string it will be converted to a Charset instance.
+ If charset is None, the charset parameter will be removed from the
+ Content-Type field. Anything else will generate a TypeError.
+
+ The message will be assumed to be of type text/* encoded with
+ charset.input_charset. It will be converted to charset.output_charset
+ and encoded properly, if needed, when generating the plain text
+ representation of the message. MIME headers (MIME-Version,
+ Content-Type, Content-Transfer-Encoding) will be added as needed.
+ """
+ if charset is None:
+ self.del_param('charset')
+ self._charset = None
+ return
+ if not isinstance(charset, Charset):
+ charset = Charset(charset)
+ self._charset = charset
+ if 'MIME-Version' not in self:
+ self.add_header('MIME-Version', '1.0')
+ if 'Content-Type' not in self:
+ self.add_header('Content-Type', 'text/plain',
+ charset=charset.get_output_charset())
+ else:
+ self.set_param('charset', charset.get_output_charset())
+ if charset != charset.get_output_charset():
+ self._payload = charset.body_encode(self._payload)
+ if 'Content-Transfer-Encoding' not in self:
+ cte = charset.get_body_encoding()
+ try:
+ cte(self)
+ except TypeError:
+ # This 'if' is for backward compatibility, it allows unicode
+ # through even though that won't work correctly if the
+ # message is serialized.
+ payload = self._payload
+ if payload:
+ try:
+ payload = payload.encode('ascii', 'surrogateescape')
+ except UnicodeError:
+ payload = payload.encode(charset.output_charset)
+ self._payload = charset.body_encode(payload)
+ self.add_header('Content-Transfer-Encoding', cte)
+
+ def get_charset(self):
+ """Return the Charset instance associated with the message's payload.
+ """
+ return self._charset
+
+ #
+ # MAPPING INTERFACE (partial)
+ #
+ def __len__(self):
+ """Return the total number of headers, including duplicates."""
+ return len(self._headers)
+
+ def __getitem__(self, name):
+ """Get a header value.
+
+ Return None if the header is missing instead of raising an exception.
+
+ Note that if the header appeared multiple times, exactly which
+ occurrence gets returned is undefined. Use get_all() to get all
+ the values matching a header field name.
+ """
+ return self.get(name)
+
+ def __setitem__(self, name, val):
+ """Set the value of a header.
+
+ Note: this does not overwrite an existing header with the same field
+ name. Use __delitem__() first to delete any existing headers.
+ """
+ max_count = self.policy.header_max_count(name)
+ if max_count:
+ lname = name.lower()
+ found = 0
+ for k, v in self._headers:
+ if k.lower() == lname:
+ found += 1
+ if found >= max_count:
+ raise ValueError("There may be at most {} {} headers "
+ "in a message".format(max_count, name))
+ self._headers.append(self.policy.header_store_parse(name, val))
+
+ def __delitem__(self, name):
+ """Delete all occurrences of a header, if present.
+
+ Does not raise an exception if the header is missing.
+ """
+ name = name.lower()
+ newheaders = []
+ for k, v in self._headers:
+ if k.lower() != name:
+ newheaders.append((k, v))
+ self._headers = newheaders
+
+ def __contains__(self, name):
+ return name.lower() in [k.lower() for k, v in self._headers]
+
+ def __iter__(self):
+ for field, value in self._headers:
+ yield field
+
+ def keys(self):
+ """Return a list of all the message's header field names.
+
+ These will be sorted in the order they appeared in the original
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
+ """
+ return [k for k, v in self._headers]
+
+ def values(self):
+ """Return a list of all the message's header values.
+
+ These will be sorted in the order they appeared in the original
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
+ """
+ return [self.policy.header_fetch_parse(k, v)
+ for k, v in self._headers]
+
+ def items(self):
+ """Get all the message's header fields and values.
+
+ These will be sorted in the order they appeared in the original
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
+ """
+ return [(k, self.policy.header_fetch_parse(k, v))
+ for k, v in self._headers]
+
+ def get(self, name, failobj=None):
+ """Get a header value.
+
+ Like __getitem__() but return failobj instead of None when the field
+ is missing.
+ """
+ name = name.lower()
+ for k, v in self._headers:
+ if k.lower() == name:
+ return self.policy.header_fetch_parse(k, v)
+ return failobj
+
+ #
+ # "Internal" methods (public API, but only intended for use by a parser
+ # or generator, not normal application code.
+ #
+
+ def set_raw(self, name, value):
+ """Store name and value in the model without modification.
+
+ This is an "internal" API, intended only for use by a parser.
+ """
+ self._headers.append((name, value))
+
+ def raw_items(self):
+ """Return the (name, value) header pairs without modification.
+
+ This is an "internal" API, intended only for use by a generator.
+ """
+ return iter(self._headers.copy())
+
+ #
+ # Additional useful stuff
+ #
+
+ def get_all(self, name, failobj=None):
+ """Return a list of all the values for the named field.
+
+ These will be sorted in the order they appeared in the original
+ message, and may contain duplicates. Any fields deleted and
+ re-inserted are always appended to the header list.
+
+ If no such fields exist, failobj is returned (defaults to None).
+ """
+ values = []
+ name = name.lower()
+ for k, v in self._headers:
+ if k.lower() == name:
+ values.append(self.policy.header_fetch_parse(k, v))
+ if not values:
+ return failobj
+ return values
+
+ def add_header(self, _name, _value, **_params):
+ """Extended header setting.
+
+ name is the header field to add. keyword arguments can be used to set
+ additional parameters for the header field, with underscores converted
+ to dashes. Normally the parameter will be added as key="value" unless
+ value is None, in which case only the key will be added. If a
+ parameter value contains non-ASCII characters it can be specified as a
+ three-tuple of (charset, language, value), in which case it will be
+ encoded according to RFC2231 rules. Otherwise it will be encoded using
+ the utf-8 charset and a language of ''.
+
+ Examples:
+
+ msg.add_header('content-disposition', 'attachment', filename='bud.gif')
+ msg.add_header('content-disposition', 'attachment',
+ filename=('utf-8', '', Fußballer.ppt'))
+ msg.add_header('content-disposition', 'attachment',
+ filename='Fußballer.ppt'))
+ """
+ parts = []
+ for k, v in _params.items():
+ if v is None:
+ parts.append(k.replace('_', '-'))
+ else:
+ parts.append(_formatparam(k.replace('_', '-'), v))
+ if _value is not None:
+ parts.insert(0, _value)
+ self[_name] = SEMISPACE.join(parts)
+
+ def replace_header(self, _name, _value):
+ """Replace a header.
+
+ Replace the first matching header found in the message, retaining
+ header order and case. If no matching header was found, a KeyError is
+ raised.
+ """
+ _name = _name.lower()
+ for i, (k, v) in zip(range(len(self._headers)), self._headers):
+ if k.lower() == _name:
+ self._headers[i] = self.policy.header_store_parse(k, _value)
+ break
+ else:
+ raise KeyError(_name)
+
+ #
+ # Use these three methods instead of the three above.
+ #
+
+ def get_content_type(self):
+ """Return the message's content type.
+
+ The returned string is coerced to lower case of the form
+ `maintype/subtype'. If there was no Content-Type header in the
+ message, the default type as given by get_default_type() will be
+ returned. Since according to RFC 2045, messages always have a default
+ type this will always return a value.
+
+ RFC 2045 defines a message's default type to be text/plain unless it
+ appears inside a multipart/digest container, in which case it would be
+ message/rfc822.
+ """
+ missing = object()
+ value = self.get('content-type', missing)
+ if value is missing:
+ # This should have no parameters
+ return self.get_default_type()
+ ctype = _splitparam(value)[0].lower()
+ # RFC 2045, section 5.2 says if its invalid, use text/plain
+ if ctype.count('/') != 1:
+ return 'text/plain'
+ return ctype
+
+ def get_content_maintype(self):
+ """Return the message's main content type.
+
+ This is the `maintype' part of the string returned by
+ get_content_type().
+ """
+ ctype = self.get_content_type()
+ return ctype.split('/')[0]
+
+ def get_content_subtype(self):
+ """Returns the message's sub-content type.
+
+ This is the `subtype' part of the string returned by
+ get_content_type().
+ """
+ ctype = self.get_content_type()
+ return ctype.split('/')[1]
+
+ def get_default_type(self):
+ """Return the `default' content type.
+
+ Most messages have a default content type of text/plain, except for
+ messages that are subparts of multipart/digest containers. Such
+ subparts have a default content type of message/rfc822.
+ """
+ return self._default_type
+
+ def set_default_type(self, ctype):
+ """Set the `default' content type.
+
+ ctype should be either "text/plain" or "message/rfc822", although this
+ is not enforced. The default content type is not stored in the
+ Content-Type header.
+ """
+ self._default_type = ctype
+
+ def _get_params_preserve(self, failobj, header):
+ # Like get_params() but preserves the quoting of values. BAW:
+ # should this be part of the public interface?
+ missing = object()
+ value = self.get(header, missing)
+ if value is missing:
+ return failobj
+ params = []
+ for p in _parseparam(value):
+ try:
+ name, val = p.split('=', 1)
+ name = name.strip()
+ val = val.strip()
+ except ValueError:
+ # Must have been a bare attribute
+ name = p.strip()
+ val = ''
+ params.append((name, val))
+ params = utils.decode_params(params)
+ return params
+
+ def get_params(self, failobj=None, header='content-type', unquote=True):
+ """Return the message's Content-Type parameters, as a list.
+
+ The elements of the returned list are 2-tuples of key/value pairs, as
+ split on the `=' sign. The left hand side of the `=' is the key,
+ while the right hand side is the value. If there is no `=' sign in
+ the parameter the value is the empty string. The value is as
+ described in the get_param() method.
+
+ Optional failobj is the object to return if there is no Content-Type
+ header. Optional header is the header to search instead of
+ Content-Type. If unquote is True, the value is unquoted.
+ """
+ missing = object()
+ params = self._get_params_preserve(missing, header)
+ if params is missing:
+ return failobj
+ if unquote:
+ return [(k, _unquotevalue(v)) for k, v in params]
+ else:
+ return params
+
+ def get_param(self, param, failobj=None, header='content-type',
+ unquote=True):
+ """Return the parameter value if found in the Content-Type header.
+
+ Optional failobj is the object to return if there is no Content-Type
+ header, or the Content-Type header has no such parameter. Optional
+ header is the header to search instead of Content-Type.
+
+ Parameter keys are always compared case insensitively. The return
+ value can either be a string, or a 3-tuple if the parameter was RFC
+ 2231 encoded. When it's a 3-tuple, the elements of the value are of
+ the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
+ LANGUAGE can be None, in which case you should consider VALUE to be
+ encoded in the us-ascii charset. You can usually ignore LANGUAGE.
+ The parameter value (either the returned string, or the VALUE item in
+ the 3-tuple) is always unquoted, unless unquote is set to False.
+
+ If your application doesn't care whether the parameter was RFC 2231
+ encoded, it can turn the return value into a string as follows:
+
+ rawparam = msg.get_param('foo')
+ param = email.utils.collapse_rfc2231_value(rawparam)
+
+ """
+ if header not in self:
+ return failobj
+ for k, v in self._get_params_preserve(failobj, header):
+ if k.lower() == param.lower():
+ if unquote:
+ return _unquotevalue(v)
+ else:
+ return v
+ return failobj
+
+ def set_param(self, param, value, header='Content-Type', requote=True,
+ charset=None, language='', replace=False):
+ """Set a parameter in the Content-Type header.
+
+ If the parameter already exists in the header, its value will be
+ replaced with the new value.
+
+ If header is Content-Type and has not yet been defined for this
+ message, it will be set to "text/plain" and the new parameter and
+ value will be appended as per RFC 2045.
+
+ An alternate header can be specified in the header argument, and all
+ parameters will be quoted as necessary unless requote is False.
+
+ If charset is specified, the parameter will be encoded according to RFC
+ 2231. Optional language specifies the RFC 2231 language, defaulting
+ to the empty string. Both charset and language should be strings.
+ """
+ if not isinstance(value, tuple) and charset:
+ value = (charset, language, value)
+
+ if header not in self and header.lower() == 'content-type':
+ ctype = 'text/plain'
+ else:
+ ctype = self.get(header)
+ if not self.get_param(param, header=header):
+ if not ctype:
+ ctype = _formatparam(param, value, requote)
+ else:
+ ctype = SEMISPACE.join(
+ [ctype, _formatparam(param, value, requote)])
+ else:
+ ctype = ''
+ for old_param, old_value in self.get_params(header=header,
+ unquote=requote):
+ append_param = ''
+ if old_param.lower() == param.lower():
+ append_param = _formatparam(param, value, requote)
+ else:
+ append_param = _formatparam(old_param, old_value, requote)
+ if not ctype:
+ ctype = append_param
+ else:
+ ctype = SEMISPACE.join([ctype, append_param])
+ if ctype != self.get(header):
+ if replace:
+ self.replace_header(header, ctype)
+ else:
+ del self[header]
+ self[header] = ctype
+
+ def del_param(self, param, header='content-type', requote=True):
+ """Remove the given parameter completely from the Content-Type header.
+
+ The header will be re-written in place without the parameter or its
+ value. All values will be quoted as necessary unless requote is
+ False. Optional header specifies an alternative to the Content-Type
+ header.
+ """
+ if header not in self:
+ return
+ new_ctype = ''
+ for p, v in self.get_params(header=header, unquote=requote):
+ if p.lower() != param.lower():
+ if not new_ctype:
+ new_ctype = _formatparam(p, v, requote)
+ else:
+ new_ctype = SEMISPACE.join([new_ctype,
+ _formatparam(p, v, requote)])
+ if new_ctype != self.get(header):
+ del self[header]
+ self[header] = new_ctype
+
+ def set_type(self, type, header='Content-Type', requote=True):
+ """Set the main type and subtype for the Content-Type header.
+
+ type must be a string in the form "maintype/subtype", otherwise a
+ ValueError is raised.
+
+ This method replaces the Content-Type header, keeping all the
+ parameters in place. If requote is False, this leaves the existing
+ header's quoting as is. Otherwise, the parameters will be quoted (the
+ default).
+
+ An alternative header can be specified in the header argument. When
+ the Content-Type header is set, we'll always also add a MIME-Version
+ header.
+ """
+ # BAW: should we be strict?
+ if not type.count('/') == 1:
+ raise ValueError
+ # Set the Content-Type, you get a MIME-Version
+ if header.lower() == 'content-type':
+ del self['mime-version']
+ self['MIME-Version'] = '1.0'
+ if header not in self:
+ self[header] = type
+ return
+ params = self.get_params(header=header, unquote=requote)
+ del self[header]
+ self[header] = type
+ # Skip the first param; it's the old type.
+ for p, v in params[1:]:
+ self.set_param(p, v, header, requote)
+
+ def get_filename(self, failobj=None):
+ """Return the filename associated with the payload if present.
+
+ The filename is extracted from the Content-Disposition header's
+ `filename' parameter, and it is unquoted. If that header is missing
+ the `filename' parameter, this method falls back to looking for the
+ `name' parameter.
+ """
+ missing = object()
+ filename = self.get_param('filename', missing, 'content-disposition')
+ if filename is missing:
+ filename = self.get_param('name', missing, 'content-type')
+ if filename is missing:
+ return failobj
+ return utils.collapse_rfc2231_value(filename).strip()
+
+ def get_boundary(self, failobj=None):
+ """Return the boundary associated with the payload if present.
+
+ The boundary is extracted from the Content-Type header's `boundary'
+ parameter, and it is unquoted.
+ """
+ missing = object()
+ boundary = self.get_param('boundary', missing)
+ if boundary is missing:
+ return failobj
+ # RFC 2046 says that boundaries may begin but not end in w/s
+ return utils.collapse_rfc2231_value(boundary).rstrip()
+
+ def set_boundary(self, boundary):
+ """Set the boundary parameter in Content-Type to 'boundary'.
+
+ This is subtly different than deleting the Content-Type header and
+ adding a new one with a new boundary parameter via add_header(). The
+ main difference is that using the set_boundary() method preserves the
+ order of the Content-Type header in the original message.
+
+ HeaderParseError is raised if the message has no Content-Type header.
+ """
+ missing = object()
+ params = self._get_params_preserve(missing, 'content-type')
+ if params is missing:
+ # There was no Content-Type header, and we don't know what type
+ # to set it to, so raise an exception.
+ raise errors.HeaderParseError('No Content-Type header found')
+ newparams = []
+ foundp = False
+ for pk, pv in params:
+ if pk.lower() == 'boundary':
+ newparams.append(('boundary', '"%s"' % boundary))
+ foundp = True
+ else:
+ newparams.append((pk, pv))
+ if not foundp:
+ # The original Content-Type header had no boundary attribute.
+ # Tack one on the end. BAW: should we raise an exception
+ # instead???
+ newparams.append(('boundary', '"%s"' % boundary))
+ # Replace the existing Content-Type header with the new value
+ newheaders = []
+ for h, v in self._headers:
+ if h.lower() == 'content-type':
+ parts = []
+ for k, v in newparams:
+ if v == '':
+ parts.append(k)
+ else:
+ parts.append('%s=%s' % (k, v))
+ val = SEMISPACE.join(parts)
+ newheaders.append(self.policy.header_store_parse(h, val))
+
+ else:
+ newheaders.append((h, v))
+ self._headers = newheaders
+
+ def get_content_charset(self, failobj=None):
+ """Return the charset parameter of the Content-Type header.
+
+ The returned string is always coerced to lower case. If there is no
+ Content-Type header, or if that header has no charset parameter,
+ failobj is returned.
+ """
+ missing = object()
+ charset = self.get_param('charset', missing)
+ if charset is missing:
+ return failobj
+ if isinstance(charset, tuple):
+ # RFC 2231 encoded, so decode it, and it better end up as ascii.
+ pcharset = charset[0] or 'us-ascii'
+ try:
+ # LookupError will be raised if the charset isn't known to
+ # Python. UnicodeError will be raised if the encoded text
+ # contains a character not in the charset.
+ as_bytes = charset[2].encode('raw-unicode-escape')
+ charset = str(as_bytes, pcharset)
+ except (LookupError, UnicodeError):
+ charset = charset[2]
+ # charset characters must be in us-ascii range
+ try:
+ charset.encode('us-ascii')
+ except UnicodeError:
+ return failobj
+ # RFC 2046, $4.1.2 says charsets are not case sensitive
+ return charset.lower()
+
+ def get_charsets(self, failobj=None):
+ """Return a list containing the charset(s) used in this message.
+
+ The returned list of items describes the Content-Type headers'
+ charset parameter for this message and all the subparts in its
+ payload.
+
+ Each item will either be a string (the value of the charset parameter
+ in the Content-Type header of that part) or the value of the
+ 'failobj' parameter (defaults to None), if the part does not have a
+ main MIME type of "text", or the charset is not defined.
+
+ The list will contain one string for each part of the message, plus
+ one for the container message (i.e. self), so that a non-multipart
+ message will still return a list of length 1.
+ """
+ return [part.get_content_charset(failobj) for part in self.walk()]
+
+ def get_content_disposition(self):
+ """Return the message's content-disposition if it exists, or None.
+
+ The return values can be either 'inline', 'attachment' or None
+ according to the rfc2183.
+ """
+ value = self.get('content-disposition')
+ if value is None:
+ return None
+ c_d = _splitparam(value)[0].lower()
+ return c_d
+
+ # I.e. def walk(self): ...
+ from email.iterators import walk
+
+
+class MIMEPart(Message):
+
+ def __init__(self, policy=None):
+ if policy is None:
+ from email.policy import default
+ policy = default
+ Message.__init__(self, policy)
+
+
+ def as_string(self, unixfrom=False, maxheaderlen=None, policy=None):
+ """Return the entire formatted message as a string.
+
+ Optional 'unixfrom', when true, means include the Unix From_ envelope
+ header. maxheaderlen is retained for backward compatibility with the
+ base Message class, but defaults to None, meaning that the policy value
+ for max_line_length controls the header maximum length. 'policy' is
+ passed to the Generator instance used to serialize the mesasge; if it
+ is not specified the policy associated with the message instance is
+ used.
+ """
+ policy = self.policy if policy is None else policy
+ if maxheaderlen is None:
+ maxheaderlen = policy.max_line_length
+ return super().as_string(maxheaderlen=maxheaderlen, policy=policy)
+
+ def __str__(self):
+ return self.as_string(policy=self.policy.clone(utf8=True))
+
+ def is_attachment(self):
+ c_d = self.get('content-disposition')
+ return False if c_d is None else c_d.content_disposition == 'attachment'
+
+ def _find_body(self, part, preferencelist):
+ if part.is_attachment():
+ return
+ maintype, subtype = part.get_content_type().split('/')
+ if maintype == 'text':
+ if subtype in preferencelist:
+ yield (preferencelist.index(subtype), part)
+ return
+ if maintype != 'multipart':
+ return
+ if subtype != 'related':
+ for subpart in part.iter_parts():
+ yield from self._find_body(subpart, preferencelist)
+ return
+ if 'related' in preferencelist:
+ yield (preferencelist.index('related'), part)
+ candidate = None
+ start = part.get_param('start')
+ if start:
+ for subpart in part.iter_parts():
+ if subpart['content-id'] == start:
+ candidate = subpart
+ break
+ if candidate is None:
+ subparts = part.get_payload()
+ candidate = subparts[0] if subparts else None
+ if candidate is not None:
+ yield from self._find_body(candidate, preferencelist)
+
+ def get_body(self, preferencelist=('related', 'html', 'plain')):
+ """Return best candidate mime part for display as 'body' of message.
+
+ Do a depth first search, starting with self, looking for the first part
+ matching each of the items in preferencelist, and return the part
+ corresponding to the first item that has a match, or None if no items
+ have a match. If 'related' is not included in preferencelist, consider
+ the root part of any multipart/related encountered as a candidate
+ match. Ignore parts with 'Content-Disposition: attachment'.
+ """
+ best_prio = len(preferencelist)
+ body = None
+ for prio, part in self._find_body(self, preferencelist):
+ if prio < best_prio:
+ best_prio = prio
+ body = part
+ if prio == 0:
+ break
+ return body
+
+ _body_types = {('text', 'plain'),
+ ('text', 'html'),
+ ('multipart', 'related'),
+ ('multipart', 'alternative')}
+ def iter_attachments(self):
+ """Return an iterator over the non-main parts of a multipart.
+
+ Skip the first of each occurrence of text/plain, text/html,
+ multipart/related, or multipart/alternative in the multipart (unless
+ they have a 'Content-Disposition: attachment' header) and include all
+ remaining subparts in the returned iterator. When applied to a
+ multipart/related, return all parts except the root part. Return an
+ empty iterator when applied to a multipart/alternative or a
+ non-multipart.
+ """
+ maintype, subtype = self.get_content_type().split('/')
+ if maintype != 'multipart' or subtype == 'alternative':
+ return
+ payload = self.get_payload()
+ # Certain malformed messages can have content type set to `multipart/*`
+ # but still have single part body, in which case payload.copy() can
+ # fail with AttributeError.
+ try:
+ parts = payload.copy()
+ except AttributeError:
+ # payload is not a list, it is most probably a string.
+ return
+
+ if maintype == 'multipart' and subtype == 'related':
+ # For related, we treat everything but the root as an attachment.
+ # The root may be indicated by 'start'; if there's no start or we
+ # can't find the named start, treat the first subpart as the root.
+ start = self.get_param('start')
+ if start:
+ found = False
+ attachments = []
+ for part in parts:
+ if part.get('content-id') == start:
+ found = True
+ else:
+ attachments.append(part)
+ if found:
+ yield from attachments
+ return
+ parts.pop(0)
+ yield from parts
+ return
+ # Otherwise we more or less invert the remaining logic in get_body.
+ # This only really works in edge cases (ex: non-text related or
+ # alternatives) if the sending agent sets content-disposition.
+ seen = [] # Only skip the first example of each candidate type.
+ for part in parts:
+ maintype, subtype = part.get_content_type().split('/')
+ if ((maintype, subtype) in self._body_types and
+ not part.is_attachment() and subtype not in seen):
+ seen.append(subtype)
+ continue
+ yield part
+
+ def iter_parts(self):
+ """Return an iterator over all immediate subparts of a multipart.
+
+ Return an empty iterator for a non-multipart.
+ """
+ if self.get_content_maintype() == 'multipart':
+ yield from self.get_payload()
+
+ def get_content(self, *args, content_manager=None, **kw):
+ if content_manager is None:
+ content_manager = self.policy.content_manager
+ return content_manager.get_content(self, *args, **kw)
+
+ def set_content(self, *args, content_manager=None, **kw):
+ if content_manager is None:
+ content_manager = self.policy.content_manager
+ content_manager.set_content(self, *args, **kw)
+
+ def _make_multipart(self, subtype, disallowed_subtypes, boundary):
+ if self.get_content_maintype() == 'multipart':
+ existing_subtype = self.get_content_subtype()
+ disallowed_subtypes = disallowed_subtypes + (subtype,)
+ if existing_subtype in disallowed_subtypes:
+ raise ValueError("Cannot convert {} to {}".format(
+ existing_subtype, subtype))
+ keep_headers = []
+ part_headers = []
+ for name, value in self._headers:
+ if name.lower().startswith('content-'):
+ part_headers.append((name, value))
+ else:
+ keep_headers.append((name, value))
+ if part_headers:
+ # There is existing content, move it to the first subpart.
+ part = type(self)(policy=self.policy)
+ part._headers = part_headers
+ part._payload = self._payload
+ self._payload = [part]
+ else:
+ self._payload = []
+ self._headers = keep_headers
+ self['Content-Type'] = 'multipart/' + subtype
+ if boundary is not None:
+ self.set_param('boundary', boundary)
+
+ def make_related(self, boundary=None):
+ self._make_multipart('related', ('alternative', 'mixed'), boundary)
+
+ def make_alternative(self, boundary=None):
+ self._make_multipart('alternative', ('mixed',), boundary)
+
+ def make_mixed(self, boundary=None):
+ self._make_multipart('mixed', (), boundary)
+
+ def _add_multipart(self, _subtype, *args, _disp=None, **kw):
+ if (self.get_content_maintype() != 'multipart' or
+ self.get_content_subtype() != _subtype):
+ getattr(self, 'make_' + _subtype)()
+ part = type(self)(policy=self.policy)
+ part.set_content(*args, **kw)
+ if _disp and 'content-disposition' not in part:
+ part['Content-Disposition'] = _disp
+ self.attach(part)
+
+ def add_related(self, *args, **kw):
+ self._add_multipart('related', *args, _disp='inline', **kw)
+
+ def add_alternative(self, *args, **kw):
+ self._add_multipart('alternative', *args, **kw)
+
+ def add_attachment(self, *args, **kw):
+ self._add_multipart('mixed', *args, _disp='attachment', **kw)
+
+ def clear(self):
+ self._headers = []
+ self._payload = None
+
+ def clear_content(self):
+ self._headers = [(n, v) for n, v in self._headers
+ if not n.lower().startswith('content-')]
+ self._payload = None
+
+
+class EmailMessage(MIMEPart):
+
+ def set_content(self, *args, **kw):
+ super().set_content(*args, **kw)
+ if 'MIME-Version' not in self:
+ self['MIME-Version'] = '1.0'
diff --git a/dist/lib/email/mime/__init__.py b/dist/lib/email/mime/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/dist/lib/email/mime/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/lib/email/mime/__pycache__/__init__.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..3a1eeaa
Binary files /dev/null and b/dist/lib/email/mime/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/mime/__pycache__/application.cpython-38.opt-1.pyc b/dist/lib/email/mime/__pycache__/application.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..a7f0cb2
Binary files /dev/null and b/dist/lib/email/mime/__pycache__/application.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/mime/__pycache__/audio.cpython-38.opt-1.pyc b/dist/lib/email/mime/__pycache__/audio.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..de154f6
Binary files /dev/null and b/dist/lib/email/mime/__pycache__/audio.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/mime/__pycache__/base.cpython-38.opt-1.pyc b/dist/lib/email/mime/__pycache__/base.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..530aefe
Binary files /dev/null and b/dist/lib/email/mime/__pycache__/base.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/mime/__pycache__/image.cpython-38.opt-1.pyc b/dist/lib/email/mime/__pycache__/image.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..8d52627
Binary files /dev/null and b/dist/lib/email/mime/__pycache__/image.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/mime/__pycache__/message.cpython-38.opt-1.pyc b/dist/lib/email/mime/__pycache__/message.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..2fbf978
Binary files /dev/null and b/dist/lib/email/mime/__pycache__/message.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/mime/__pycache__/multipart.cpython-38.opt-1.pyc b/dist/lib/email/mime/__pycache__/multipart.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..ff8e068
Binary files /dev/null and b/dist/lib/email/mime/__pycache__/multipart.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/mime/__pycache__/nonmultipart.cpython-38.opt-1.pyc b/dist/lib/email/mime/__pycache__/nonmultipart.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..91460cc
Binary files /dev/null and b/dist/lib/email/mime/__pycache__/nonmultipart.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/mime/__pycache__/text.cpython-38.opt-1.pyc b/dist/lib/email/mime/__pycache__/text.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..46850ea
Binary files /dev/null and b/dist/lib/email/mime/__pycache__/text.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/email/mime/application.py b/dist/lib/email/mime/application.py
new file mode 100644
index 0000000..6877e55
--- /dev/null
+++ b/dist/lib/email/mime/application.py
@@ -0,0 +1,37 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Keith Dart
+# Contact: email-sig@python.org
+
+"""Class representing application/* type MIME documents."""
+
+__all__ = ["MIMEApplication"]
+
+from email import encoders
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+class MIMEApplication(MIMENonMultipart):
+ """Class for generating application/* MIME documents."""
+
+ def __init__(self, _data, _subtype='octet-stream',
+ _encoder=encoders.encode_base64, *, policy=None, **_params):
+ """Create an application/* type MIME document.
+
+ _data is a string containing the raw application data.
+
+ _subtype is the MIME content type subtype, defaulting to
+ 'octet-stream'.
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the application data, defaulting to base64 encoding.
+
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ raise TypeError('Invalid application MIME subtype')
+ MIMENonMultipart.__init__(self, 'application', _subtype, policy=policy,
+ **_params)
+ self.set_payload(_data)
+ _encoder(self)
diff --git a/dist/lib/email/mime/audio.py b/dist/lib/email/mime/audio.py
new file mode 100644
index 0000000..4bcd7b2
--- /dev/null
+++ b/dist/lib/email/mime/audio.py
@@ -0,0 +1,74 @@
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Anthony Baxter
+# Contact: email-sig@python.org
+
+"""Class representing audio/* type MIME documents."""
+
+__all__ = ['MIMEAudio']
+
+import sndhdr
+
+from io import BytesIO
+from email import encoders
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+_sndhdr_MIMEmap = {'au' : 'basic',
+ 'wav' :'x-wav',
+ 'aiff':'x-aiff',
+ 'aifc':'x-aiff',
+ }
+
+# There are others in sndhdr that don't have MIME types. :(
+# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma??
+def _whatsnd(data):
+ """Try to identify a sound file type.
+
+ sndhdr.what() has a pretty cruddy interface, unfortunately. This is why
+ we re-do it here. It would be easier to reverse engineer the Unix 'file'
+ command and use the standard 'magic' file, as shipped with a modern Unix.
+ """
+ hdr = data[:512]
+ fakefile = BytesIO(hdr)
+ for testfn in sndhdr.tests:
+ res = testfn(hdr, fakefile)
+ if res is not None:
+ return _sndhdr_MIMEmap.get(res[0])
+ return None
+
+
+
+class MIMEAudio(MIMENonMultipart):
+ """Class for generating audio/* MIME documents."""
+
+ def __init__(self, _audiodata, _subtype=None,
+ _encoder=encoders.encode_base64, *, policy=None, **_params):
+ """Create an audio/* type MIME document.
+
+ _audiodata is a string containing the raw audio data. If this data
+ can be decoded by the standard Python `sndhdr' module, then the
+ subtype will be automatically included in the Content-Type header.
+ Otherwise, you can specify the specific audio subtype via the
+ _subtype parameter. If _subtype is not given, and no subtype can be
+ guessed, a TypeError is raised.
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the image data. It takes one argument, which is this
+ Image instance. It should use get_payload() and set_payload() to
+ change the payload to the encoded form. It should also add any
+ Content-Transfer-Encoding or other headers to the message as
+ necessary. The default encoding is Base64.
+
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ _subtype = _whatsnd(_audiodata)
+ if _subtype is None:
+ raise TypeError('Could not find audio MIME subtype')
+ MIMENonMultipart.__init__(self, 'audio', _subtype, policy=policy,
+ **_params)
+ self.set_payload(_audiodata)
+ _encoder(self)
diff --git a/dist/lib/email/mime/base.py b/dist/lib/email/mime/base.py
new file mode 100644
index 0000000..1a3f9b5
--- /dev/null
+++ b/dist/lib/email/mime/base.py
@@ -0,0 +1,30 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME specializations."""
+
+__all__ = ['MIMEBase']
+
+import email.policy
+
+from email import message
+
+
+
+class MIMEBase(message.Message):
+ """Base class for MIME specializations."""
+
+ def __init__(self, _maintype, _subtype, *, policy=None, **_params):
+ """This constructor adds a Content-Type: and a MIME-Version: header.
+
+ The Content-Type: header is taken from the _maintype and _subtype
+ arguments. Additional parameters for this header are taken from the
+ keyword arguments.
+ """
+ if policy is None:
+ policy = email.policy.compat32
+ message.Message.__init__(self, policy=policy)
+ ctype = '%s/%s' % (_maintype, _subtype)
+ self.add_header('Content-Type', ctype, **_params)
+ self['MIME-Version'] = '1.0'
diff --git a/dist/lib/email/mime/image.py b/dist/lib/email/mime/image.py
new file mode 100644
index 0000000..9272464
--- /dev/null
+++ b/dist/lib/email/mime/image.py
@@ -0,0 +1,47 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing image/* type MIME documents."""
+
+__all__ = ['MIMEImage']
+
+import imghdr
+
+from email import encoders
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+class MIMEImage(MIMENonMultipart):
+ """Class for generating image/* type MIME documents."""
+
+ def __init__(self, _imagedata, _subtype=None,
+ _encoder=encoders.encode_base64, *, policy=None, **_params):
+ """Create an image/* type MIME document.
+
+ _imagedata is a string containing the raw image data. If this data
+ can be decoded by the standard Python `imghdr' module, then the
+ subtype will be automatically included in the Content-Type header.
+ Otherwise, you can specify the specific image subtype via the _subtype
+ parameter.
+
+ _encoder is a function which will perform the actual encoding for
+ transport of the image data. It takes one argument, which is this
+ Image instance. It should use get_payload() and set_payload() to
+ change the payload to the encoded form. It should also add any
+ Content-Transfer-Encoding or other headers to the message as
+ necessary. The default encoding is Base64.
+
+ Any additional keyword arguments are passed to the base class
+ constructor, which turns them into parameters on the Content-Type
+ header.
+ """
+ if _subtype is None:
+ _subtype = imghdr.what(None, _imagedata)
+ if _subtype is None:
+ raise TypeError('Could not guess image MIME subtype')
+ MIMENonMultipart.__init__(self, 'image', _subtype, policy=policy,
+ **_params)
+ self.set_payload(_imagedata)
+ _encoder(self)
diff --git a/dist/lib/email/mime/message.py b/dist/lib/email/mime/message.py
new file mode 100644
index 0000000..07e4f2d
--- /dev/null
+++ b/dist/lib/email/mime/message.py
@@ -0,0 +1,34 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing message/* MIME documents."""
+
+__all__ = ['MIMEMessage']
+
+from email import message
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+class MIMEMessage(MIMENonMultipart):
+ """Class representing message/* MIME documents."""
+
+ def __init__(self, _msg, _subtype='rfc822', *, policy=None):
+ """Create a message/* type MIME document.
+
+ _msg is a message object and must be an instance of Message, or a
+ derived class of Message, otherwise a TypeError is raised.
+
+ Optional _subtype defines the subtype of the contained message. The
+ default is "rfc822" (this is defined by the MIME standard, even though
+ the term "rfc822" is technically outdated by RFC 2822).
+ """
+ MIMENonMultipart.__init__(self, 'message', _subtype, policy=policy)
+ if not isinstance(_msg, message.Message):
+ raise TypeError('Argument is not an instance of Message')
+ # It's convenient to use this base class method. We need to do it
+ # this way or we'll get an exception
+ message.Message.attach(self, _msg)
+ # And be sure our default type is set correctly
+ self.set_default_type('message/rfc822')
diff --git a/dist/lib/email/mime/multipart.py b/dist/lib/email/mime/multipart.py
new file mode 100644
index 0000000..2d3f288
--- /dev/null
+++ b/dist/lib/email/mime/multipart.py
@@ -0,0 +1,48 @@
+# Copyright (C) 2002-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME multipart/* type messages."""
+
+__all__ = ['MIMEMultipart']
+
+from email.mime.base import MIMEBase
+
+
+
+class MIMEMultipart(MIMEBase):
+ """Base class for MIME multipart/* type messages."""
+
+ def __init__(self, _subtype='mixed', boundary=None, _subparts=None,
+ *, policy=None,
+ **_params):
+ """Creates a multipart/* type message.
+
+ By default, creates a multipart/mixed message, with proper
+ Content-Type and MIME-Version headers.
+
+ _subtype is the subtype of the multipart content type, defaulting to
+ `mixed'.
+
+ boundary is the multipart boundary string. By default it is
+ calculated as needed.
+
+ _subparts is a sequence of initial subparts for the payload. It
+ must be an iterable object, such as a list. You can always
+ attach new subparts to the message by using the attach() method.
+
+ Additional parameters for the Content-Type header are taken from the
+ keyword arguments (or passed into the _params argument).
+ """
+ MIMEBase.__init__(self, 'multipart', _subtype, policy=policy, **_params)
+
+ # Initialise _payload to an empty list as the Message superclass's
+ # implementation of is_multipart assumes that _payload is a list for
+ # multipart messages.
+ self._payload = []
+
+ if _subparts:
+ for p in _subparts:
+ self.attach(p)
+ if boundary:
+ self.set_boundary(boundary)
diff --git a/dist/lib/email/mime/nonmultipart.py b/dist/lib/email/mime/nonmultipart.py
new file mode 100644
index 0000000..e1f5196
--- /dev/null
+++ b/dist/lib/email/mime/nonmultipart.py
@@ -0,0 +1,22 @@
+# Copyright (C) 2002-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Base class for MIME type messages that are not multipart."""
+
+__all__ = ['MIMENonMultipart']
+
+from email import errors
+from email.mime.base import MIMEBase
+
+
+
+class MIMENonMultipart(MIMEBase):
+ """Base class for MIME non-multipart type messages."""
+
+ def attach(self, payload):
+ # The public API prohibits attaching multiple subparts to MIMEBase
+ # derived subtypes since none of them are, by definition, of content
+ # type multipart/*
+ raise errors.MultipartConversionError(
+ 'Cannot attach additional subparts to non-multipart/*')
diff --git a/dist/lib/email/mime/text.py b/dist/lib/email/mime/text.py
new file mode 100644
index 0000000..35b4423
--- /dev/null
+++ b/dist/lib/email/mime/text.py
@@ -0,0 +1,42 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Class representing text/* type MIME documents."""
+
+__all__ = ['MIMEText']
+
+from email.charset import Charset
+from email.mime.nonmultipart import MIMENonMultipart
+
+
+
+class MIMEText(MIMENonMultipart):
+ """Class for generating text/* type MIME documents."""
+
+ def __init__(self, _text, _subtype='plain', _charset=None, *, policy=None):
+ """Create a text/* type MIME document.
+
+ _text is the string for this message object.
+
+ _subtype is the MIME sub content type, defaulting to "plain".
+
+ _charset is the character set parameter added to the Content-Type
+ header. This defaults to "us-ascii". Note that as a side-effect, the
+ Content-Transfer-Encoding header will also be set.
+ """
+
+ # If no _charset was specified, check to see if there are non-ascii
+ # characters present. If not, use 'us-ascii', otherwise use utf-8.
+ # XXX: This can be removed once #7304 is fixed.
+ if _charset is None:
+ try:
+ _text.encode('us-ascii')
+ _charset = 'us-ascii'
+ except UnicodeEncodeError:
+ _charset = 'utf-8'
+
+ MIMENonMultipart.__init__(self, 'text', _subtype, policy=policy,
+ **{'charset': str(_charset)})
+
+ self.set_payload(_text, _charset)
diff --git a/dist/lib/email/parser.py b/dist/lib/email/parser.py
new file mode 100644
index 0000000..7db4da1
--- /dev/null
+++ b/dist/lib/email/parser.py
@@ -0,0 +1,131 @@
+# Copyright (C) 2001-2007 Python Software Foundation
+# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
+# Contact: email-sig@python.org
+
+"""A parser of RFC 2822 and MIME email messages."""
+
+__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
+ 'FeedParser', 'BytesFeedParser']
+
+from io import StringIO, TextIOWrapper
+
+from email.feedparser import FeedParser, BytesFeedParser
+from email._policybase import compat32
+
+
+class Parser:
+ def __init__(self, _class=None, *, policy=compat32):
+ """Parser of RFC 2822 and MIME email messages.
+
+ Creates an in-memory object tree representing the email message, which
+ can then be manipulated and turned over to a Generator to return the
+ textual representation of the message.
+
+ The string must be formatted as a block of RFC 2822 headers and header
+ continuation lines, optionally preceded by a `Unix-from' header. The
+ header block is terminated either by the end of the string or by a
+ blank line.
+
+ _class is the class to instantiate for new message objects when they
+ must be created. This class must have a constructor that can take
+ zero arguments. Default is Message.Message.
+
+ The policy keyword specifies a policy object that controls a number of
+ aspects of the parser's operation. The default policy maintains
+ backward compatibility.
+
+ """
+ self._class = _class
+ self.policy = policy
+
+ def parse(self, fp, headersonly=False):
+ """Create a message structure from the data in a file.
+
+ Reads all the data from the file and returns the root of the message
+ structure. Optional headersonly is a flag specifying whether to stop
+ parsing after reading the headers or not. The default is False,
+ meaning it parses the entire contents of the file.
+ """
+ feedparser = FeedParser(self._class, policy=self.policy)
+ if headersonly:
+ feedparser._set_headersonly()
+ while True:
+ data = fp.read(8192)
+ if not data:
+ break
+ feedparser.feed(data)
+ return feedparser.close()
+
+ def parsestr(self, text, headersonly=False):
+ """Create a message structure from a string.
+
+ Returns the root of the message structure. Optional headersonly is a
+ flag specifying whether to stop parsing after reading the headers or
+ not. The default is False, meaning it parses the entire contents of
+ the file.
+ """
+ return self.parse(StringIO(text), headersonly=headersonly)
+
+
+
+class HeaderParser(Parser):
+ def parse(self, fp, headersonly=True):
+ return Parser.parse(self, fp, True)
+
+ def parsestr(self, text, headersonly=True):
+ return Parser.parsestr(self, text, True)
+
+
+class BytesParser:
+
+ def __init__(self, *args, **kw):
+ """Parser of binary RFC 2822 and MIME email messages.
+
+ Creates an in-memory object tree representing the email message, which
+ can then be manipulated and turned over to a Generator to return the
+ textual representation of the message.
+
+ The input must be formatted as a block of RFC 2822 headers and header
+ continuation lines, optionally preceded by a `Unix-from' header. The
+ header block is terminated either by the end of the input or by a
+ blank line.
+
+ _class is the class to instantiate for new message objects when they
+ must be created. This class must have a constructor that can take
+ zero arguments. Default is Message.Message.
+ """
+ self.parser = Parser(*args, **kw)
+
+ def parse(self, fp, headersonly=False):
+ """Create a message structure from the data in a binary file.
+
+ Reads all the data from the file and returns the root of the message
+ structure. Optional headersonly is a flag specifying whether to stop
+ parsing after reading the headers or not. The default is False,
+ meaning it parses the entire contents of the file.
+ """
+ fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
+ try:
+ return self.parser.parse(fp, headersonly)
+ finally:
+ fp.detach()
+
+
+ def parsebytes(self, text, headersonly=False):
+ """Create a message structure from a byte string.
+
+ Returns the root of the message structure. Optional headersonly is a
+ flag specifying whether to stop parsing after reading the headers or
+ not. The default is False, meaning it parses the entire contents of
+ the file.
+ """
+ text = text.decode('ASCII', errors='surrogateescape')
+ return self.parser.parsestr(text, headersonly)
+
+
+class BytesHeaderParser(BytesParser):
+ def parse(self, fp, headersonly=True):
+ return BytesParser.parse(self, fp, headersonly=True)
+
+ def parsebytes(self, text, headersonly=True):
+ return BytesParser.parsebytes(self, text, headersonly=True)
diff --git a/dist/lib/email/policy.py b/dist/lib/email/policy.py
new file mode 100644
index 0000000..611deb5
--- /dev/null
+++ b/dist/lib/email/policy.py
@@ -0,0 +1,224 @@
+"""This will be the home for the policy that hooks in the new
+code that adds all the email6 features.
+"""
+
+import re
+import sys
+from email._policybase import Policy, Compat32, compat32, _extend_docstrings
+from email.utils import _has_surrogates
+from email.headerregistry import HeaderRegistry as HeaderRegistry
+from email.contentmanager import raw_data_manager
+from email.message import EmailMessage
+
+__all__ = [
+ 'Compat32',
+ 'compat32',
+ 'Policy',
+ 'EmailPolicy',
+ 'default',
+ 'strict',
+ 'SMTP',
+ 'HTTP',
+ ]
+
+linesep_splitter = re.compile(r'\n|\r')
+
+@_extend_docstrings
+class EmailPolicy(Policy):
+
+ """+
+ PROVISIONAL
+
+ The API extensions enabled by this policy are currently provisional.
+ Refer to the documentation for details.
+
+ This policy adds new header parsing and folding algorithms. Instead of
+ simple strings, headers are custom objects with custom attributes
+ depending on the type of the field. The folding algorithm fully
+ implements RFCs 2047 and 5322.
+
+ In addition to the settable attributes listed above that apply to
+ all Policies, this policy adds the following additional attributes:
+
+ utf8 -- if False (the default) message headers will be
+ serialized as ASCII, using encoded words to encode
+ any non-ASCII characters in the source strings. If
+ True, the message headers will be serialized using
+ utf8 and will not contain encoded words (see RFC
+ 6532 for more on this serialization format).
+
+ refold_source -- if the value for a header in the Message object
+ came from the parsing of some source, this attribute
+ indicates whether or not a generator should refold
+ that value when transforming the message back into
+ stream form. The possible values are:
+
+ none -- all source values use original folding
+ long -- source values that have any line that is
+ longer than max_line_length will be
+ refolded
+ all -- all values are refolded.
+
+ The default is 'long'.
+
+ header_factory -- a callable that takes two arguments, 'name' and
+ 'value', where 'name' is a header field name and
+ 'value' is an unfolded header field value, and
+ returns a string-like object that represents that
+ header. A default header_factory is provided that
+ understands some of the RFC5322 header field types.
+ (Currently address fields and date fields have
+ special treatment, while all other fields are
+ treated as unstructured. This list will be
+ completed before the extension is marked stable.)
+
+ content_manager -- an object with at least two methods: get_content
+ and set_content. When the get_content or
+ set_content method of a Message object is called,
+ it calls the corresponding method of this object,
+ passing it the message object as its first argument,
+ and any arguments or keywords that were passed to
+ it as additional arguments. The default
+ content_manager is
+ :data:`~email.contentmanager.raw_data_manager`.
+
+ """
+
+ message_factory = EmailMessage
+ utf8 = False
+ refold_source = 'long'
+ header_factory = HeaderRegistry()
+ content_manager = raw_data_manager
+
+ def __init__(self, **kw):
+ # Ensure that each new instance gets a unique header factory
+ # (as opposed to clones, which share the factory).
+ if 'header_factory' not in kw:
+ object.__setattr__(self, 'header_factory', HeaderRegistry())
+ super().__init__(**kw)
+
+ def header_max_count(self, name):
+ """+
+ The implementation for this class returns the max_count attribute from
+ the specialized header class that would be used to construct a header
+ of type 'name'.
+ """
+ return self.header_factory[name].max_count
+
+ # The logic of the next three methods is chosen such that it is possible to
+ # switch a Message object between a Compat32 policy and a policy derived
+ # from this class and have the results stay consistent. This allows a
+ # Message object constructed with this policy to be passed to a library
+ # that only handles Compat32 objects, or to receive such an object and
+ # convert it to use the newer style by just changing its policy. It is
+ # also chosen because it postpones the relatively expensive full rfc5322
+ # parse until as late as possible when parsing from source, since in many
+ # applications only a few headers will actually be inspected.
+
+ def header_source_parse(self, sourcelines):
+ """+
+ The name is parsed as everything up to the ':' and returned unmodified.
+ The value is determined by stripping leading whitespace off the
+ remainder of the first line, joining all subsequent lines together, and
+ stripping any trailing carriage return or linefeed characters. (This
+ is the same as Compat32).
+
+ """
+ name, value = sourcelines[0].split(':', 1)
+ value = value.lstrip(' \t') + ''.join(sourcelines[1:])
+ return (name, value.rstrip('\r\n'))
+
+ def header_store_parse(self, name, value):
+ """+
+ The name is returned unchanged. If the input value has a 'name'
+ attribute and it matches the name ignoring case, the value is returned
+ unchanged. Otherwise the name and value are passed to header_factory
+ method, and the resulting custom header object is returned as the
+ value. In this case a ValueError is raised if the input value contains
+ CR or LF characters.
+
+ """
+ if hasattr(value, 'name') and value.name.lower() == name.lower():
+ return (name, value)
+ if isinstance(value, str) and len(value.splitlines())>1:
+ # XXX this error message isn't quite right when we use splitlines
+ # (see issue 22233), but I'm not sure what should happen here.
+ raise ValueError("Header values may not contain linefeed "
+ "or carriage return characters")
+ return (name, self.header_factory(name, value))
+
+ def header_fetch_parse(self, name, value):
+ """+
+ If the value has a 'name' attribute, it is returned to unmodified.
+ Otherwise the name and the value with any linesep characters removed
+ are passed to the header_factory method, and the resulting custom
+ header object is returned. Any surrogateescaped bytes get turned
+ into the unicode unknown-character glyph.
+
+ """
+ if hasattr(value, 'name'):
+ return value
+ # We can't use splitlines here because it splits on more than \r and \n.
+ value = ''.join(linesep_splitter.split(value))
+ return self.header_factory(name, value)
+
+ def fold(self, name, value):
+ """+
+ Header folding is controlled by the refold_source policy setting. A
+ value is considered to be a 'source value' if and only if it does not
+ have a 'name' attribute (having a 'name' attribute means it is a header
+ object of some sort). If a source value needs to be refolded according
+ to the policy, it is converted into a custom header object by passing
+ the name and the value with any linesep characters removed to the
+ header_factory method. Folding of a custom header object is done by
+ calling its fold method with the current policy.
+
+ Source values are split into lines using splitlines. If the value is
+ not to be refolded, the lines are rejoined using the linesep from the
+ policy and returned. The exception is lines containing non-ascii
+ binary data. In that case the value is refolded regardless of the
+ refold_source setting, which causes the binary data to be CTE encoded
+ using the unknown-8bit charset.
+
+ """
+ return self._fold(name, value, refold_binary=True)
+
+ def fold_binary(self, name, value):
+ """+
+ The same as fold if cte_type is 7bit, except that the returned value is
+ bytes.
+
+ If cte_type is 8bit, non-ASCII binary data is converted back into
+ bytes. Headers with binary data are not refolded, regardless of the
+ refold_header setting, since there is no way to know whether the binary
+ data consists of single byte characters or multibyte characters.
+
+ If utf8 is true, headers are encoded to utf8, otherwise to ascii with
+ non-ASCII unicode rendered as encoded words.
+
+ """
+ folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
+ charset = 'utf8' if self.utf8 else 'ascii'
+ return folded.encode(charset, 'surrogateescape')
+
+ def _fold(self, name, value, refold_binary=False):
+ if hasattr(value, 'name'):
+ return value.fold(policy=self)
+ maxlen = self.max_line_length if self.max_line_length else sys.maxsize
+ lines = value.splitlines()
+ refold = (self.refold_source == 'all' or
+ self.refold_source == 'long' and
+ (lines and len(lines[0])+len(name)+2 > maxlen or
+ any(len(x) > maxlen for x in lines[1:])))
+ if refold or refold_binary and _has_surrogates(value):
+ return self.header_factory(name, ''.join(lines)).fold(policy=self)
+ return name + ': ' + self.linesep.join(lines) + self.linesep
+
+
+default = EmailPolicy()
+# Make the default policy use the class default header_factory
+del default.header_factory
+strict = default.clone(raise_on_defect=True)
+SMTP = default.clone(linesep='\r\n')
+HTTP = default.clone(linesep='\r\n', max_line_length=None)
+SMTPUTF8 = SMTP.clone(utf8=True)
diff --git a/dist/lib/email/quoprimime.py b/dist/lib/email/quoprimime.py
new file mode 100644
index 0000000..94534f7
--- /dev/null
+++ b/dist/lib/email/quoprimime.py
@@ -0,0 +1,299 @@
+# Copyright (C) 2001-2006 Python Software Foundation
+# Author: Ben Gertzfield
+# Contact: email-sig@python.org
+
+"""Quoted-printable content transfer encoding per RFCs 2045-2047.
+
+This module handles the content transfer encoding method defined in RFC 2045
+to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to
+safely encode text that is in a character set similar to the 7-bit US ASCII
+character set, but that includes some 8-bit characters that are normally not
+allowed in email bodies or headers.
+
+Quoted-printable is very space-inefficient for encoding binary files; use the
+email.base64mime module for that instead.
+
+This module provides an interface to encode and decode both headers and bodies
+with quoted-printable encoding.
+
+RFC 2045 defines a method for including character set information in an
+`encoded-word' in a header. This method is commonly used for 8-bit real names
+in To:/From:/Cc: etc. fields, as well as Subject: lines.
+
+This module does not do the line wrapping or end-of-line character
+conversion necessary for proper internationalized headers; it only
+does dumb encoding and decoding. To deal with the various line
+wrapping issues, use the email.header module.
+"""
+
+__all__ = [
+ 'body_decode',
+ 'body_encode',
+ 'body_length',
+ 'decode',
+ 'decodestring',
+ 'header_decode',
+ 'header_encode',
+ 'header_length',
+ 'quote',
+ 'unquote',
+ ]
+
+import re
+
+from string import ascii_letters, digits, hexdigits
+
+CRLF = '\r\n'
+NL = '\n'
+EMPTYSTRING = ''
+
+# Build a mapping of octets to the expansion of that octet. Since we're only
+# going to have 256 of these things, this isn't terribly inefficient
+# space-wise. Remember that headers and bodies have different sets of safe
+# characters. Initialize both maps with the full expansion, and then override
+# the safe bytes with the more compact form.
+_QUOPRI_MAP = ['=%02X' % c for c in range(256)]
+_QUOPRI_HEADER_MAP = _QUOPRI_MAP[:]
+_QUOPRI_BODY_MAP = _QUOPRI_MAP[:]
+
+# Safe header bytes which need no encoding.
+for c in b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii'):
+ _QUOPRI_HEADER_MAP[c] = chr(c)
+# Headers have one other special encoding; spaces become underscores.
+_QUOPRI_HEADER_MAP[ord(' ')] = '_'
+
+# Safe body bytes which need no encoding.
+for c in (b' !"#$%&\'()*+,-./0123456789:;<>'
+ b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`'
+ b'abcdefghijklmnopqrstuvwxyz{|}~\t'):
+ _QUOPRI_BODY_MAP[c] = chr(c)
+
+
+
+# Helpers
+def header_check(octet):
+ """Return True if the octet should be escaped with header quopri."""
+ return chr(octet) != _QUOPRI_HEADER_MAP[octet]
+
+
+def body_check(octet):
+ """Return True if the octet should be escaped with body quopri."""
+ return chr(octet) != _QUOPRI_BODY_MAP[octet]
+
+
+def header_length(bytearray):
+ """Return a header quoted-printable encoding length.
+
+ Note that this does not include any RFC 2047 chrome added by
+ `header_encode()`.
+
+ :param bytearray: An array of bytes (a.k.a. octets).
+ :return: The length in bytes of the byte array when it is encoded with
+ quoted-printable for headers.
+ """
+ return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray)
+
+
+def body_length(bytearray):
+ """Return a body quoted-printable encoding length.
+
+ :param bytearray: An array of bytes (a.k.a. octets).
+ :return: The length in bytes of the byte array when it is encoded with
+ quoted-printable for bodies.
+ """
+ return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray)
+
+
+def _max_append(L, s, maxlen, extra=''):
+ if not isinstance(s, str):
+ s = chr(s)
+ if not L:
+ L.append(s.lstrip())
+ elif len(L[-1]) + len(s) <= maxlen:
+ L[-1] += extra + s
+ else:
+ L.append(s.lstrip())
+
+
+def unquote(s):
+ """Turn a string in the form =AB to the ASCII character with value 0xab"""
+ return chr(int(s[1:3], 16))
+
+
+def quote(c):
+ return _QUOPRI_MAP[ord(c)]
+
+
+def header_encode(header_bytes, charset='iso-8859-1'):
+ """Encode a single header line with quoted-printable (like) encoding.
+
+ Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
+ used specifically for email header fields to allow charsets with mostly 7
+ bit characters (and some 8 bit) to remain more or less readable in non-RFC
+ 2045 aware mail clients.
+
+ charset names the character set to use in the RFC 2046 header. It
+ defaults to iso-8859-1.
+ """
+ # Return empty headers as an empty string.
+ if not header_bytes:
+ return ''
+ # Iterate over every byte, encoding if necessary.
+ encoded = header_bytes.decode('latin1').translate(_QUOPRI_HEADER_MAP)
+ # Now add the RFC chrome to each encoded chunk and glue the chunks
+ # together.
+ return '=?%s?q?%s?=' % (charset, encoded)
+
+
+_QUOPRI_BODY_ENCODE_MAP = _QUOPRI_BODY_MAP[:]
+for c in b'\r\n':
+ _QUOPRI_BODY_ENCODE_MAP[c] = chr(c)
+
+def body_encode(body, maxlinelen=76, eol=NL):
+ """Encode with quoted-printable, wrapping at maxlinelen characters.
+
+ Each line of encoded text will end with eol, which defaults to "\\n". Set
+ this to "\\r\\n" if you will be using the result of this function directly
+ in an email.
+
+ Each line will be wrapped at, at most, maxlinelen characters before the
+ eol string (maxlinelen defaults to 76 characters, the maximum value
+ permitted by RFC 2045). Long lines will have the 'soft line break'
+ quoted-printable character "=" appended to them, so the decoded text will
+ be identical to the original text.
+
+ The minimum maxlinelen is 4 to have room for a quoted character ("=XX")
+ followed by a soft line break. Smaller values will generate a
+ ValueError.
+
+ """
+
+ if maxlinelen < 4:
+ raise ValueError("maxlinelen must be at least 4")
+ if not body:
+ return body
+
+ # quote special characters
+ body = body.translate(_QUOPRI_BODY_ENCODE_MAP)
+
+ soft_break = '=' + eol
+ # leave space for the '=' at the end of a line
+ maxlinelen1 = maxlinelen - 1
+
+ encoded_body = []
+ append = encoded_body.append
+
+ for line in body.splitlines():
+ # break up the line into pieces no longer than maxlinelen - 1
+ start = 0
+ laststart = len(line) - 1 - maxlinelen
+ while start <= laststart:
+ stop = start + maxlinelen1
+ # make sure we don't break up an escape sequence
+ if line[stop - 2] == '=':
+ append(line[start:stop - 1])
+ start = stop - 2
+ elif line[stop - 1] == '=':
+ append(line[start:stop])
+ start = stop - 1
+ else:
+ append(line[start:stop] + '=')
+ start = stop
+
+ # handle rest of line, special case if line ends in whitespace
+ if line and line[-1] in ' \t':
+ room = start - laststart
+ if room >= 3:
+ # It's a whitespace character at end-of-line, and we have room
+ # for the three-character quoted encoding.
+ q = quote(line[-1])
+ elif room == 2:
+ # There's room for the whitespace character and a soft break.
+ q = line[-1] + soft_break
+ else:
+ # There's room only for a soft break. The quoted whitespace
+ # will be the only content on the subsequent line.
+ q = soft_break + quote(line[-1])
+ append(line[start:-1] + q)
+ else:
+ append(line[start:])
+
+ # add back final newline if present
+ if body[-1] in CRLF:
+ append('')
+
+ return eol.join(encoded_body)
+
+
+
+# BAW: I'm not sure if the intent was for the signature of this function to be
+# the same as base64MIME.decode() or not...
+def decode(encoded, eol=NL):
+ """Decode a quoted-printable string.
+
+ Lines are separated with eol, which defaults to \\n.
+ """
+ if not encoded:
+ return encoded
+ # BAW: see comment in encode() above. Again, we're building up the
+ # decoded string with string concatenation, which could be done much more
+ # efficiently.
+ decoded = ''
+
+ for line in encoded.splitlines():
+ line = line.rstrip()
+ if not line:
+ decoded += eol
+ continue
+
+ i = 0
+ n = len(line)
+ while i < n:
+ c = line[i]
+ if c != '=':
+ decoded += c
+ i += 1
+ # Otherwise, c == "=". Are we at the end of the line? If so, add
+ # a soft line break.
+ elif i+1 == n:
+ i += 1
+ continue
+ # Decode if in form =AB
+ elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:
+ decoded += unquote(line[i:i+3])
+ i += 3
+ # Otherwise, not in form =AB, pass literally
+ else:
+ decoded += c
+ i += 1
+
+ if i == n:
+ decoded += eol
+ # Special case if original string did not end with eol
+ if encoded[-1] not in '\r\n' and decoded.endswith(eol):
+ decoded = decoded[:-1]
+ return decoded
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_decode = decode
+decodestring = decode
+
+
+
+def _unquote_match(match):
+ """Turn a match in the form =AB to the ASCII character with value 0xab"""
+ s = match.group(0)
+ return unquote(s)
+
+
+# Header decoding is done a bit differently
+def header_decode(s):
+ """Decode a string encoded with RFC 2045 MIME header `Q' encoding.
+
+ This function does not parse a full MIME header value encoded with
+ quoted-printable (like =?iso-8859-1?q?Hello_World?=) -- please use
+ the high level email.header class for that functionality.
+ """
+ s = s.replace('_', ' ')
+ return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, flags=re.ASCII)
diff --git a/dist/lib/email/utils.py b/dist/lib/email/utils.py
new file mode 100644
index 0000000..07dd029
--- /dev/null
+++ b/dist/lib/email/utils.py
@@ -0,0 +1,376 @@
+# Copyright (C) 2001-2010 Python Software Foundation
+# Author: Barry Warsaw
+# Contact: email-sig@python.org
+
+"""Miscellaneous utilities."""
+
+__all__ = [
+ 'collapse_rfc2231_value',
+ 'decode_params',
+ 'decode_rfc2231',
+ 'encode_rfc2231',
+ 'formataddr',
+ 'formatdate',
+ 'format_datetime',
+ 'getaddresses',
+ 'make_msgid',
+ 'mktime_tz',
+ 'parseaddr',
+ 'parsedate',
+ 'parsedate_tz',
+ 'parsedate_to_datetime',
+ 'unquote',
+ ]
+
+import os
+import re
+import time
+import random
+import socket
+import datetime
+import urllib.parse
+
+from email._parseaddr import quote
+from email._parseaddr import AddressList as _AddressList
+from email._parseaddr import mktime_tz
+
+from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz
+
+# Intrapackage imports
+from email.charset import Charset
+
+COMMASPACE = ', '
+EMPTYSTRING = ''
+UEMPTYSTRING = ''
+CRLF = '\r\n'
+TICK = "'"
+
+specialsre = re.compile(r'[][\\()<>@,:;".]')
+escapesre = re.compile(r'[\\"]')
+
+def _has_surrogates(s):
+ """Return True if s contains surrogate-escaped binary data."""
+ # This check is based on the fact that unless there are surrogates, utf8
+ # (Python's default encoding) can encode any string. This is the fastest
+ # way to check for surrogates, see issue 11454 for timings.
+ try:
+ s.encode()
+ return False
+ except UnicodeEncodeError:
+ return True
+
+# How to deal with a string containing bytes before handing it to the
+# application through the 'normal' interface.
+def _sanitize(string):
+ # Turn any escaped bytes into unicode 'unknown' char. If the escaped
+ # bytes happen to be utf-8 they will instead get decoded, even if they
+ # were invalid in the charset the source was supposed to be in. This
+ # seems like it is not a bad thing; a defect was still registered.
+ original_bytes = string.encode('utf-8', 'surrogateescape')
+ return original_bytes.decode('utf-8', 'replace')
+
+
+
+# Helpers
+
+def formataddr(pair, charset='utf-8'):
+ """The inverse of parseaddr(), this takes a 2-tuple of the form
+ (realname, email_address) and returns the string value suitable
+ for an RFC 2822 From, To or Cc header.
+
+ If the first element of pair is false, then the second element is
+ returned unmodified.
+
+ The optional charset is the character set that is used to encode
+ realname in case realname is not ASCII safe. Can be an instance of str or
+ a Charset-like object which has a header_encode method. Default is
+ 'utf-8'.
+ """
+ name, address = pair
+ # The address MUST (per RFC) be ascii, so raise a UnicodeError if it isn't.
+ address.encode('ascii')
+ if name:
+ try:
+ name.encode('ascii')
+ except UnicodeEncodeError:
+ if isinstance(charset, str):
+ charset = Charset(charset)
+ encoded_name = charset.header_encode(name)
+ return "%s <%s>" % (encoded_name, address)
+ else:
+ quotes = ''
+ if specialsre.search(name):
+ quotes = '"'
+ name = escapesre.sub(r'\\\g<0>', name)
+ return '%s%s%s <%s>' % (quotes, name, quotes, address)
+ return address
+
+
+
+def getaddresses(fieldvalues):
+ """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
+ all = COMMASPACE.join(fieldvalues)
+ a = _AddressList(all)
+ return a.addresslist
+
+
+def _format_timetuple_and_zone(timetuple, zone):
+ return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
+ ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]],
+ timetuple[2],
+ ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+ 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1],
+ timetuple[0], timetuple[3], timetuple[4], timetuple[5],
+ zone)
+
+def formatdate(timeval=None, localtime=False, usegmt=False):
+ """Returns a date string as specified by RFC 2822, e.g.:
+
+ Fri, 09 Nov 2001 01:08:47 -0000
+
+ Optional timeval if given is a floating point time value as accepted by
+ gmtime() and localtime(), otherwise the current time is used.
+
+ Optional localtime is a flag that when True, interprets timeval, and
+ returns a date relative to the local timezone instead of UTC, properly
+ taking daylight savings time into account.
+
+ Optional argument usegmt means that the timezone is written out as
+ an ascii string, not numeric one (so "GMT" instead of "+0000"). This
+ is needed for HTTP, and is only used when localtime==False.
+ """
+ # Note: we cannot use strftime() because that honors the locale and RFC
+ # 2822 requires that day and month names be the English abbreviations.
+ if timeval is None:
+ timeval = time.time()
+ if localtime or usegmt:
+ dt = datetime.datetime.fromtimestamp(timeval, datetime.timezone.utc)
+ else:
+ dt = datetime.datetime.utcfromtimestamp(timeval)
+ if localtime:
+ dt = dt.astimezone()
+ usegmt = False
+ return format_datetime(dt, usegmt)
+
+def format_datetime(dt, usegmt=False):
+ """Turn a datetime into a date string as specified in RFC 2822.
+
+ If usegmt is True, dt must be an aware datetime with an offset of zero. In
+ this case 'GMT' will be rendered instead of the normal +0000 required by
+ RFC2822. This is to support HTTP headers involving date stamps.
+ """
+ now = dt.timetuple()
+ if usegmt:
+ if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc:
+ raise ValueError("usegmt option requires a UTC datetime")
+ zone = 'GMT'
+ elif dt.tzinfo is None:
+ zone = '-0000'
+ else:
+ zone = dt.strftime("%z")
+ return _format_timetuple_and_zone(now, zone)
+
+
+def make_msgid(idstring=None, domain=None):
+ """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
+
+ <142480216486.20800.16526388040877946887@nightshade.la.mastaler.com>
+
+ Optional idstring if given is a string used to strengthen the
+ uniqueness of the message id. Optional domain if given provides the
+ portion of the message id after the '@'. It defaults to the locally
+ defined hostname.
+ """
+ timeval = int(time.time()*100)
+ pid = os.getpid()
+ randint = random.getrandbits(64)
+ if idstring is None:
+ idstring = ''
+ else:
+ idstring = '.' + idstring
+ if domain is None:
+ domain = socket.getfqdn()
+ msgid = '<%d.%d.%d%s@%s>' % (timeval, pid, randint, idstring, domain)
+ return msgid
+
+
+def parsedate_to_datetime(data):
+ *dtuple, tz = _parsedate_tz(data)
+ if tz is None:
+ return datetime.datetime(*dtuple[:6])
+ return datetime.datetime(*dtuple[:6],
+ tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
+
+
+def parseaddr(addr):
+ """
+ Parse addr into its constituent realname and email address parts.
+
+ Return a tuple of realname and email address, unless the parse fails, in
+ which case return a 2-tuple of ('', '').
+ """
+ addrs = _AddressList(addr).addresslist
+ if not addrs:
+ return '', ''
+ return addrs[0]
+
+
+# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
+def unquote(str):
+ """Remove quotes from a string."""
+ if len(str) > 1:
+ if str.startswith('"') and str.endswith('"'):
+ return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
+ if str.startswith('<') and str.endswith('>'):
+ return str[1:-1]
+ return str
+
+
+
+# RFC2231-related functions - parameter encoding and decoding
+def decode_rfc2231(s):
+ """Decode string according to RFC 2231"""
+ parts = s.split(TICK, 2)
+ if len(parts) <= 2:
+ return None, None, s
+ return parts
+
+
+def encode_rfc2231(s, charset=None, language=None):
+ """Encode string according to RFC 2231.
+
+ If neither charset nor language is given, then s is returned as-is. If
+ charset is given but not language, the string is encoded using the empty
+ string for language.
+ """
+ s = urllib.parse.quote(s, safe='', encoding=charset or 'ascii')
+ if charset is None and language is None:
+ return s
+ if language is None:
+ language = ''
+ return "%s'%s'%s" % (charset, language, s)
+
+
+rfc2231_continuation = re.compile(r'^(?P\w+)\*((?P[0-9]+)\*?)?$',
+ re.ASCII)
+
+def decode_params(params):
+ """Decode parameters list according to RFC 2231.
+
+ params is a sequence of 2-tuples containing (param name, string value).
+ """
+ # Copy params so we don't mess with the original
+ params = params[:]
+ new_params = []
+ # Map parameter's name to a list of continuations. The values are a
+ # 3-tuple of the continuation number, the string value, and a flag
+ # specifying whether a particular segment is %-encoded.
+ rfc2231_params = {}
+ name, value = params.pop(0)
+ new_params.append((name, value))
+ while params:
+ name, value = params.pop(0)
+ if name.endswith('*'):
+ encoded = True
+ else:
+ encoded = False
+ value = unquote(value)
+ mo = rfc2231_continuation.match(name)
+ if mo:
+ name, num = mo.group('name', 'num')
+ if num is not None:
+ num = int(num)
+ rfc2231_params.setdefault(name, []).append((num, value, encoded))
+ else:
+ new_params.append((name, '"%s"' % quote(value)))
+ if rfc2231_params:
+ for name, continuations in rfc2231_params.items():
+ value = []
+ extended = False
+ # Sort by number
+ continuations.sort()
+ # And now append all values in numerical order, converting
+ # %-encodings for the encoded segments. If any of the
+ # continuation names ends in a *, then the entire string, after
+ # decoding segments and concatenating, must have the charset and
+ # language specifiers at the beginning of the string.
+ for num, s, encoded in continuations:
+ if encoded:
+ # Decode as "latin-1", so the characters in s directly
+ # represent the percent-encoded octet values.
+ # collapse_rfc2231_value treats this as an octet sequence.
+ s = urllib.parse.unquote(s, encoding="latin-1")
+ extended = True
+ value.append(s)
+ value = quote(EMPTYSTRING.join(value))
+ if extended:
+ charset, language, value = decode_rfc2231(value)
+ new_params.append((name, (charset, language, '"%s"' % value)))
+ else:
+ new_params.append((name, '"%s"' % value))
+ return new_params
+
+def collapse_rfc2231_value(value, errors='replace',
+ fallback_charset='us-ascii'):
+ if not isinstance(value, tuple) or len(value) != 3:
+ return unquote(value)
+ # While value comes to us as a unicode string, we need it to be a bytes
+ # object. We do not want bytes() normal utf-8 decoder, we want a straight
+ # interpretation of the string as character bytes.
+ charset, language, text = value
+ if charset is None:
+ # Issue 17369: if charset/lang is None, decode_rfc2231 couldn't parse
+ # the value, so use the fallback_charset.
+ charset = fallback_charset
+ rawbytes = bytes(text, 'raw-unicode-escape')
+ try:
+ return str(rawbytes, charset, errors)
+ except LookupError:
+ # charset is not a known codec.
+ return unquote(text)
+
+
+#
+# datetime doesn't provide a localtime function yet, so provide one. Code
+# adapted from the patch in issue 9527. This may not be perfect, but it is
+# better than not having it.
+#
+
+def localtime(dt=None, isdst=-1):
+ """Return local time as an aware datetime object.
+
+ If called without arguments, return current time. Otherwise *dt*
+ argument should be a datetime instance, and it is converted to the
+ local time zone according to the system time zone database. If *dt* is
+ naive (that is, dt.tzinfo is None), it is assumed to be in local time.
+ In this case, a positive or zero value for *isdst* causes localtime to
+ presume initially that summer time (for example, Daylight Saving Time)
+ is or is not (respectively) in effect for the specified time. A
+ negative value for *isdst* causes the localtime() function to attempt
+ to divine whether summer time is in effect for the specified time.
+
+ """
+ if dt is None:
+ return datetime.datetime.now(datetime.timezone.utc).astimezone()
+ if dt.tzinfo is not None:
+ return dt.astimezone()
+ # We have a naive datetime. Convert to a (localtime) timetuple and pass to
+ # system mktime together with the isdst hint. System mktime will return
+ # seconds since epoch.
+ tm = dt.timetuple()[:-1] + (isdst,)
+ seconds = time.mktime(tm)
+ localtm = time.localtime(seconds)
+ try:
+ delta = datetime.timedelta(seconds=localtm.tm_gmtoff)
+ tz = datetime.timezone(delta, localtm.tm_zone)
+ except AttributeError:
+ # Compute UTC offset and compare with the value implied by tm_isdst.
+ # If the values match, use the zone name implied by tm_isdst.
+ delta = dt - datetime.datetime(*time.gmtime(seconds)[:6])
+ dst = time.daylight and localtm.tm_isdst > 0
+ gmtoff = -(time.altzone if dst else time.timezone)
+ if delta == datetime.timedelta(seconds=gmtoff):
+ tz = datetime.timezone(delta, time.tzname[dst])
+ else:
+ tz = datetime.timezone(delta)
+ return dt.replace(tzinfo=tz)
diff --git a/dist/lib/encodings/__init__.py b/dist/lib/encodings/__init__.py
new file mode 100644
index 0000000..ddd5afd
--- /dev/null
+++ b/dist/lib/encodings/__init__.py
@@ -0,0 +1,169 @@
+""" Standard "encodings" Package
+
+ Standard Python encoding modules are stored in this package
+ directory.
+
+ Codec modules must have names corresponding to normalized encoding
+ names as defined in the normalize_encoding() function below, e.g.
+ 'utf-8' must be implemented by the module 'utf_8.py'.
+
+ Each codec module must export the following interface:
+
+ * getregentry() -> codecs.CodecInfo object
+ The getregentry() API must return a CodecInfo object with encoder, decoder,
+ incrementalencoder, incrementaldecoder, streamwriter and streamreader
+ attributes which adhere to the Python Codec Interface Standard.
+
+ In addition, a module may optionally also define the following
+ APIs which are then used by the package's codec search function:
+
+ * getaliases() -> sequence of encoding name strings to use as aliases
+
+ Alias names returned by getaliases() must be normalized encoding
+ names as defined by normalize_encoding().
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+
+"""#"
+
+import codecs
+import sys
+from . import aliases
+
+_cache = {}
+_unknown = '--unknown--'
+_import_tail = ['*']
+_aliases = aliases.aliases
+
+class CodecRegistryError(LookupError, SystemError):
+ pass
+
+def normalize_encoding(encoding):
+
+ """ Normalize an encoding name.
+
+ Normalization works as follows: all non-alphanumeric
+ characters except the dot used for Python package names are
+ collapsed and replaced with a single underscore, e.g. ' -;#'
+ becomes '_'. Leading and trailing underscores are removed.
+
+ Note that encoding names should be ASCII only.
+
+ """
+ if isinstance(encoding, bytes):
+ encoding = str(encoding, "ascii")
+
+ chars = []
+ punct = False
+ for c in encoding:
+ if c.isalnum() or c == '.':
+ if punct and chars:
+ chars.append('_')
+ chars.append(c)
+ punct = False
+ else:
+ punct = True
+ return ''.join(chars)
+
+def search_function(encoding):
+
+ # Cache lookup
+ entry = _cache.get(encoding, _unknown)
+ if entry is not _unknown:
+ return entry
+
+ # Import the module:
+ #
+ # First try to find an alias for the normalized encoding
+ # name and lookup the module using the aliased name, then try to
+ # lookup the module using the standard import scheme, i.e. first
+ # try in the encodings package, then at top-level.
+ #
+ norm_encoding = normalize_encoding(encoding)
+ aliased_encoding = _aliases.get(norm_encoding) or \
+ _aliases.get(norm_encoding.replace('.', '_'))
+ if aliased_encoding is not None:
+ modnames = [aliased_encoding,
+ norm_encoding]
+ else:
+ modnames = [norm_encoding]
+ for modname in modnames:
+ if not modname or '.' in modname:
+ continue
+ try:
+ # Import is absolute to prevent the possibly malicious import of a
+ # module with side-effects that is not in the 'encodings' package.
+ mod = __import__('encodings.' + modname, fromlist=_import_tail,
+ level=0)
+ except ImportError:
+ # ImportError may occur because 'encodings.(modname)' does not exist,
+ # or because it imports a name that does not exist (see mbcs and oem)
+ pass
+ else:
+ break
+ else:
+ mod = None
+
+ try:
+ getregentry = mod.getregentry
+ except AttributeError:
+ # Not a codec module
+ mod = None
+
+ if mod is None:
+ # Cache misses
+ _cache[encoding] = None
+ return None
+
+ # Now ask the module for the registry entry
+ entry = getregentry()
+ if not isinstance(entry, codecs.CodecInfo):
+ if not 4 <= len(entry) <= 7:
+ raise CodecRegistryError('module "%s" (%s) failed to register'
+ % (mod.__name__, mod.__file__))
+ if not callable(entry[0]) or not callable(entry[1]) or \
+ (entry[2] is not None and not callable(entry[2])) or \
+ (entry[3] is not None and not callable(entry[3])) or \
+ (len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \
+ (len(entry) > 5 and entry[5] is not None and not callable(entry[5])):
+ raise CodecRegistryError('incompatible codecs in module "%s" (%s)'
+ % (mod.__name__, mod.__file__))
+ if len(entry)<7 or entry[6] is None:
+ entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)
+ entry = codecs.CodecInfo(*entry)
+
+ # Cache the codec registry entry
+ _cache[encoding] = entry
+
+ # Register its aliases (without overwriting previously registered
+ # aliases)
+ try:
+ codecaliases = mod.getaliases()
+ except AttributeError:
+ pass
+ else:
+ for alias in codecaliases:
+ if alias not in _aliases:
+ _aliases[alias] = modname
+
+ # Return the registry entry
+ return entry
+
+# Register the search_function in the Python codec registry
+codecs.register(search_function)
+
+if sys.platform == 'win32':
+ def _alias_mbcs(encoding):
+ try:
+ import _winapi
+ ansi_code_page = "cp%s" % _winapi.GetACP()
+ if encoding == ansi_code_page:
+ import encodings.mbcs
+ return encodings.mbcs.getregentry()
+ except ImportError:
+ # Imports may fail while we are shutting down
+ pass
+
+ codecs.register(_alias_mbcs)
diff --git a/dist/lib/encodings/__pycache__/__init__.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/__init__.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..fc6d6e3
Binary files /dev/null and b/dist/lib/encodings/__pycache__/__init__.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/aliases.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/aliases.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..cfbc119
Binary files /dev/null and b/dist/lib/encodings/__pycache__/aliases.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/ascii.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/ascii.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..7219bd4
Binary files /dev/null and b/dist/lib/encodings/__pycache__/ascii.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/base64_codec.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/base64_codec.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..24704e8
Binary files /dev/null and b/dist/lib/encodings/__pycache__/base64_codec.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/big5.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/big5.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..e7b11f7
Binary files /dev/null and b/dist/lib/encodings/__pycache__/big5.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/big5hkscs.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/big5hkscs.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..03bcc05
Binary files /dev/null and b/dist/lib/encodings/__pycache__/big5hkscs.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/bz2_codec.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/bz2_codec.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..e08fecf
Binary files /dev/null and b/dist/lib/encodings/__pycache__/bz2_codec.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/charmap.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/charmap.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..66a3261
Binary files /dev/null and b/dist/lib/encodings/__pycache__/charmap.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp037.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp037.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..3d57122
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp037.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp1006.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp1006.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..cb81633
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp1006.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp1026.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp1026.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..63d78f6
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp1026.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp1125.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp1125.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..2fe7454
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp1125.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp1140.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp1140.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..66e64a3
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp1140.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp1250.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp1250.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..444e67e
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp1250.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp1251.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp1251.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..8f3a20a
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp1251.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp1252.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp1252.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..298d28f
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp1252.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp1253.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp1253.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..9837e0a
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp1253.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp1254.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp1254.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..0efaab9
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp1254.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp1255.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp1255.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..d8f0852
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp1255.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp1256.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp1256.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..d4c8f5c
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp1256.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp1257.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp1257.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..a3ec8ae
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp1257.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp1258.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp1258.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..722974d
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp1258.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp273.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp273.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..19c04a1
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp273.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp424.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp424.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..20ada30
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp424.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp437.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp437.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..e780146
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp437.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp500.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp500.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..ea039d8
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp500.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp720.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp720.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..3a4268b
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp720.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp737.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp737.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..338ff5f
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp737.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp775.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp775.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..824b735
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp775.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp850.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp850.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..5113fc8
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp850.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp852.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp852.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..a66108f
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp852.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp855.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp855.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..05d2a9b
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp855.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp856.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp856.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..e561d3e
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp856.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp857.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp857.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..7061bee
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp857.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp858.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp858.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..d32a356
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp858.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp860.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp860.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..9896ac9
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp860.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp861.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp861.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..db8d3fc
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp861.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp862.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp862.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..06e169d
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp862.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp863.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp863.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..91d9cb1
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp863.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp864.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp864.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..d93fe77
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp864.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp865.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp865.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..fd96e5b
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp865.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp866.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp866.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..1ffb2da
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp866.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp869.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp869.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..b7acc24
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp869.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp874.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp874.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..fab2946
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp874.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp875.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp875.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..9ff9f74
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp875.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp932.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp932.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..9856e6f
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp932.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp949.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp949.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..884e390
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp949.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/cp950.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/cp950.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..bceeb50
Binary files /dev/null and b/dist/lib/encodings/__pycache__/cp950.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/euc_jis_2004.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/euc_jis_2004.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..fffe77f
Binary files /dev/null and b/dist/lib/encodings/__pycache__/euc_jis_2004.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/euc_jisx0213.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/euc_jisx0213.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..6cf9f92
Binary files /dev/null and b/dist/lib/encodings/__pycache__/euc_jisx0213.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/euc_jp.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/euc_jp.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..e56059b
Binary files /dev/null and b/dist/lib/encodings/__pycache__/euc_jp.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/euc_kr.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/euc_kr.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..06cdaf3
Binary files /dev/null and b/dist/lib/encodings/__pycache__/euc_kr.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/gb18030.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/gb18030.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..edbe9ec
Binary files /dev/null and b/dist/lib/encodings/__pycache__/gb18030.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/gb2312.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/gb2312.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..cfe2b9a
Binary files /dev/null and b/dist/lib/encodings/__pycache__/gb2312.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/gbk.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/gbk.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..5865185
Binary files /dev/null and b/dist/lib/encodings/__pycache__/gbk.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/hex_codec.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/hex_codec.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..6524c5c
Binary files /dev/null and b/dist/lib/encodings/__pycache__/hex_codec.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/hp_roman8.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/hp_roman8.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..37aaa80
Binary files /dev/null and b/dist/lib/encodings/__pycache__/hp_roman8.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/hz.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/hz.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..8e50d57
Binary files /dev/null and b/dist/lib/encodings/__pycache__/hz.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/idna.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/idna.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..8589f43
Binary files /dev/null and b/dist/lib/encodings/__pycache__/idna.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso2022_jp.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso2022_jp.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..2a8cd5f
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso2022_jp.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso2022_jp_1.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso2022_jp_1.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..d5bfae8
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso2022_jp_1.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso2022_jp_2.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso2022_jp_2.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..02a3525
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso2022_jp_2.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso2022_jp_2004.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso2022_jp_2004.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..867eff2
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso2022_jp_2004.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso2022_jp_3.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso2022_jp_3.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..6d3073f
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso2022_jp_3.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso2022_jp_ext.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso2022_jp_ext.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..cb78f35
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso2022_jp_ext.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso2022_kr.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso2022_kr.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..c506aa1
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso2022_kr.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso8859_1.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso8859_1.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..e8f64d7
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso8859_1.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso8859_10.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso8859_10.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..aa781ec
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso8859_10.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso8859_11.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso8859_11.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..20dce98
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso8859_11.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso8859_13.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso8859_13.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..0902f37
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso8859_13.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso8859_14.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso8859_14.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..4970173
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso8859_14.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso8859_15.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso8859_15.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..be2965d
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso8859_15.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso8859_16.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso8859_16.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..8e947b0
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso8859_16.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso8859_2.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso8859_2.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..3d2de08
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso8859_2.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso8859_3.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso8859_3.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..2f704f1
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso8859_3.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso8859_4.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso8859_4.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..1a39e67
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso8859_4.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso8859_5.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso8859_5.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..14f5575
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso8859_5.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso8859_6.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso8859_6.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..927169f
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso8859_6.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso8859_7.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso8859_7.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..1b4d94c
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso8859_7.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso8859_8.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso8859_8.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..e200175
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso8859_8.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/iso8859_9.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/iso8859_9.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..92d6ad4
Binary files /dev/null and b/dist/lib/encodings/__pycache__/iso8859_9.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/johab.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/johab.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..a593688
Binary files /dev/null and b/dist/lib/encodings/__pycache__/johab.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/koi8_r.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/koi8_r.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..c0c6201
Binary files /dev/null and b/dist/lib/encodings/__pycache__/koi8_r.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/koi8_t.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/koi8_t.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..2c93748
Binary files /dev/null and b/dist/lib/encodings/__pycache__/koi8_t.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/koi8_u.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/koi8_u.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..86dffd5
Binary files /dev/null and b/dist/lib/encodings/__pycache__/koi8_u.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/kz1048.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/kz1048.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..a46f85a
Binary files /dev/null and b/dist/lib/encodings/__pycache__/kz1048.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/latin_1.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/latin_1.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..9805842
Binary files /dev/null and b/dist/lib/encodings/__pycache__/latin_1.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/mac_arabic.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/mac_arabic.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..e652514
Binary files /dev/null and b/dist/lib/encodings/__pycache__/mac_arabic.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/mac_centeuro.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/mac_centeuro.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..9354f71
Binary files /dev/null and b/dist/lib/encodings/__pycache__/mac_centeuro.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/mac_croatian.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/mac_croatian.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..83e19d0
Binary files /dev/null and b/dist/lib/encodings/__pycache__/mac_croatian.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/mac_cyrillic.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/mac_cyrillic.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..26f798a
Binary files /dev/null and b/dist/lib/encodings/__pycache__/mac_cyrillic.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/mac_farsi.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/mac_farsi.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..0c03f89
Binary files /dev/null and b/dist/lib/encodings/__pycache__/mac_farsi.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/mac_greek.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/mac_greek.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..1be68df
Binary files /dev/null and b/dist/lib/encodings/__pycache__/mac_greek.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/mac_iceland.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/mac_iceland.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..2d7301e
Binary files /dev/null and b/dist/lib/encodings/__pycache__/mac_iceland.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/mac_latin2.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/mac_latin2.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..3ba4ef3
Binary files /dev/null and b/dist/lib/encodings/__pycache__/mac_latin2.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/mac_roman.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/mac_roman.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..1aabdae
Binary files /dev/null and b/dist/lib/encodings/__pycache__/mac_roman.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/mac_romanian.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/mac_romanian.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..f97693e
Binary files /dev/null and b/dist/lib/encodings/__pycache__/mac_romanian.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/mac_turkish.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/mac_turkish.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..767b40a
Binary files /dev/null and b/dist/lib/encodings/__pycache__/mac_turkish.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/mbcs.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/mbcs.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..86ddbc2
Binary files /dev/null and b/dist/lib/encodings/__pycache__/mbcs.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/oem.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/oem.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..ec00b4b
Binary files /dev/null and b/dist/lib/encodings/__pycache__/oem.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/palmos.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/palmos.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..4f7b776
Binary files /dev/null and b/dist/lib/encodings/__pycache__/palmos.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/ptcp154.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/ptcp154.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..481f82d
Binary files /dev/null and b/dist/lib/encodings/__pycache__/ptcp154.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/punycode.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/punycode.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..318c8ce
Binary files /dev/null and b/dist/lib/encodings/__pycache__/punycode.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/quopri_codec.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/quopri_codec.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..f18a2a6
Binary files /dev/null and b/dist/lib/encodings/__pycache__/quopri_codec.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/raw_unicode_escape.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/raw_unicode_escape.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..0d2adf6
Binary files /dev/null and b/dist/lib/encodings/__pycache__/raw_unicode_escape.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/rot_13.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/rot_13.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..5f10f52
Binary files /dev/null and b/dist/lib/encodings/__pycache__/rot_13.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/shift_jis.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/shift_jis.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..e8792a9
Binary files /dev/null and b/dist/lib/encodings/__pycache__/shift_jis.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/shift_jis_2004.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/shift_jis_2004.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..7fa42c7
Binary files /dev/null and b/dist/lib/encodings/__pycache__/shift_jis_2004.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/shift_jisx0213.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/shift_jisx0213.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..0ada706
Binary files /dev/null and b/dist/lib/encodings/__pycache__/shift_jisx0213.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/tis_620.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/tis_620.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..8dc680a
Binary files /dev/null and b/dist/lib/encodings/__pycache__/tis_620.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/undefined.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/undefined.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..e6ff989
Binary files /dev/null and b/dist/lib/encodings/__pycache__/undefined.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/unicode_escape.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/unicode_escape.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..a9cf25a
Binary files /dev/null and b/dist/lib/encodings/__pycache__/unicode_escape.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/utf_16.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/utf_16.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..c6551ad
Binary files /dev/null and b/dist/lib/encodings/__pycache__/utf_16.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/utf_16_be.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/utf_16_be.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..ccdc897
Binary files /dev/null and b/dist/lib/encodings/__pycache__/utf_16_be.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/utf_16_le.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/utf_16_le.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..dc37ca3
Binary files /dev/null and b/dist/lib/encodings/__pycache__/utf_16_le.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/utf_32.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/utf_32.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..c60f3e7
Binary files /dev/null and b/dist/lib/encodings/__pycache__/utf_32.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/utf_32_be.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/utf_32_be.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..9741458
Binary files /dev/null and b/dist/lib/encodings/__pycache__/utf_32_be.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/utf_32_le.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/utf_32_le.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..d60289b
Binary files /dev/null and b/dist/lib/encodings/__pycache__/utf_32_le.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/utf_7.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/utf_7.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..e0208bd
Binary files /dev/null and b/dist/lib/encodings/__pycache__/utf_7.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/utf_8.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/utf_8.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..3eabfd0
Binary files /dev/null and b/dist/lib/encodings/__pycache__/utf_8.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/utf_8_sig.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/utf_8_sig.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..0e218af
Binary files /dev/null and b/dist/lib/encodings/__pycache__/utf_8_sig.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/uu_codec.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/uu_codec.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..ccbd943
Binary files /dev/null and b/dist/lib/encodings/__pycache__/uu_codec.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/__pycache__/zlib_codec.cpython-38.opt-1.pyc b/dist/lib/encodings/__pycache__/zlib_codec.cpython-38.opt-1.pyc
new file mode 100644
index 0000000..c91dd76
Binary files /dev/null and b/dist/lib/encodings/__pycache__/zlib_codec.cpython-38.opt-1.pyc differ
diff --git a/dist/lib/encodings/aliases.py b/dist/lib/encodings/aliases.py
new file mode 100644
index 0000000..2444f9f
--- /dev/null
+++ b/dist/lib/encodings/aliases.py
@@ -0,0 +1,553 @@
+""" Encoding Aliases Support
+
+ This module is used by the encodings package search function to
+ map encodings names to module names.
+
+ Note that the search function normalizes the encoding names before
+ doing the lookup, so the mapping will have to map normalized
+ encoding names to module names.
+
+ Contents:
+
+ The following aliases dictionary contains mappings of all IANA
+ character set names for which the Python core library provides
+ codecs. In addition to these, a few Python specific codec
+ aliases have also been added.
+
+"""
+aliases = {
+
+ # Please keep this list sorted alphabetically by value !
+
+ # ascii codec
+ '646' : 'ascii',
+ 'ansi_x3.4_1968' : 'ascii',
+ 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name
+ 'ansi_x3.4_1986' : 'ascii',
+ 'cp367' : 'ascii',
+ 'csascii' : 'ascii',
+ 'ibm367' : 'ascii',
+ 'iso646_us' : 'ascii',
+ 'iso_646.irv_1991' : 'ascii',
+ 'iso_ir_6' : 'ascii',
+ 'us' : 'ascii',
+ 'us_ascii' : 'ascii',
+
+ # base64_codec codec
+ 'base64' : 'base64_codec',
+ 'base_64' : 'base64_codec',
+
+ # big5 codec
+ 'big5_tw' : 'big5',
+ 'csbig5' : 'big5',
+
+ # big5hkscs codec
+ 'big5_hkscs' : 'big5hkscs',
+ 'hkscs' : 'big5hkscs',
+
+ # bz2_codec codec
+ 'bz2' : 'bz2_codec',
+
+ # cp037 codec
+ '037' : 'cp037',
+ 'csibm037' : 'cp037',
+ 'ebcdic_cp_ca' : 'cp037',
+ 'ebcdic_cp_nl' : 'cp037',
+ 'ebcdic_cp_us' : 'cp037',
+ 'ebcdic_cp_wt' : 'cp037',
+ 'ibm037' : 'cp037',
+ 'ibm039' : 'cp037',
+
+ # cp1026 codec
+ '1026' : 'cp1026',
+ 'csibm1026' : 'cp1026',
+ 'ibm1026' : 'cp1026',
+
+ # cp1125 codec
+ '1125' : 'cp1125',
+ 'ibm1125' : 'cp1125',
+ 'cp866u' : 'cp1125',
+ 'ruscii' : 'cp1125',
+
+ # cp1140 codec
+ '1140' : 'cp1140',
+ 'ibm1140' : 'cp1140',
+
+ # cp1250 codec
+ '1250' : 'cp1250',
+ 'windows_1250' : 'cp1250',
+
+ # cp1251 codec
+ '1251' : 'cp1251',
+ 'windows_1251' : 'cp1251',
+
+ # cp1252 codec
+ '1252' : 'cp1252',
+ 'windows_1252' : 'cp1252',
+
+ # cp1253 codec
+ '1253' : 'cp1253',
+ 'windows_1253' : 'cp1253',
+
+ # cp1254 codec
+ '1254' : 'cp1254',
+ 'windows_1254' : 'cp1254',
+
+ # cp1255 codec
+ '1255' : 'cp1255',
+ 'windows_1255' : 'cp1255',
+
+ # cp1256 codec
+ '1256' : 'cp1256',
+ 'windows_1256' : 'cp1256',
+
+ # cp1257 codec
+ '1257' : 'cp1257',
+ 'windows_1257' : 'cp1257',
+
+ # cp1258 codec
+ '1258' : 'cp1258',
+ 'windows_1258' : 'cp1258',
+
+ # cp273 codec
+ '273' : 'cp273',
+ 'ibm273' : 'cp273',
+ 'csibm273' : 'cp273',
+
+ # cp424 codec
+ '424' : 'cp424',
+ 'csibm424' : 'cp424',
+ 'ebcdic_cp_he' : 'cp424',
+ 'ibm424' : 'cp424',
+
+ # cp437 codec
+ '437' : 'cp437',
+ 'cspc8codepage437' : 'cp437',
+ 'ibm437' : 'cp437',
+
+ # cp500 codec
+ '500' : 'cp500',
+ 'csibm500' : 'cp500',
+ 'ebcdic_cp_be' : 'cp500',
+ 'ebcdic_cp_ch' : 'cp500',
+ 'ibm500' : 'cp500',
+
+ # cp775 codec
+ '775' : 'cp775',
+ 'cspc775baltic' : 'cp775',
+ 'ibm775' : 'cp775',
+
+ # cp850 codec
+ '850' : 'cp850',
+ 'cspc850multilingual' : 'cp850',
+ 'ibm850' : 'cp850',
+
+ # cp852 codec
+ '852' : 'cp852',
+ 'cspcp852' : 'cp852',
+ 'ibm852' : 'cp852',
+
+ # cp855 codec
+ '855' : 'cp855',
+ 'csibm855' : 'cp855',
+ 'ibm855' : 'cp855',
+
+ # cp857 codec
+ '857' : 'cp857',
+ 'csibm857' : 'cp857',
+ 'ibm857' : 'cp857',
+
+ # cp858 codec
+ '858' : 'cp858',
+ 'csibm858' : 'cp858',
+ 'ibm858' : 'cp858',
+
+ # cp860 codec
+ '860' : 'cp860',
+ 'csibm860' : 'cp860',
+ 'ibm860' : 'cp860',
+
+ # cp861 codec
+ '861' : 'cp861',
+ 'cp_is' : 'cp861',
+ 'csibm861' : 'cp861',
+ 'ibm861' : 'cp861',
+
+ # cp862 codec
+ '862' : 'cp862',
+ 'cspc862latinhebrew' : 'cp862',
+ 'ibm862' : 'cp862',
+
+ # cp863 codec
+ '863' : 'cp863',
+ 'csibm863' : 'cp863',
+ 'ibm863' : 'cp863',
+
+ # cp864 codec
+ '864' : 'cp864',
+ 'csibm864' : 'cp864',
+ 'ibm864' : 'cp864',
+
+ # cp865 codec
+ '865' : 'cp865',
+ 'csibm865' : 'cp865',
+ 'ibm865' : 'cp865',
+
+ # cp866 codec
+ '866' : 'cp866',
+ 'csibm866' : 'cp866',
+ 'ibm866' : 'cp866',
+
+ # cp869 codec
+ '869' : 'cp869',
+ 'cp_gr' : 'cp869',
+ 'csibm869' : 'cp869',
+ 'ibm869' : 'cp869',
+
+ # cp932 codec
+ '932' : 'cp932',
+ 'ms932' : 'cp932',
+ 'mskanji' : 'cp932',
+ 'ms_kanji' : 'cp932',
+
+ # cp949 codec
+ '949' : 'cp949',
+ 'ms949' : 'cp949',
+ 'uhc' : 'cp949',
+
+ # cp950 codec
+ '950' : 'cp950',
+ 'ms950' : 'cp950',
+
+ # euc_jis_2004 codec
+ 'jisx0213' : 'euc_jis_2004',
+ 'eucjis2004' : 'euc_jis_2004',
+ 'euc_jis2004' : 'euc_jis_2004',
+
+ # euc_jisx0213 codec
+ 'eucjisx0213' : 'euc_jisx0213',
+
+ # euc_jp codec
+ 'eucjp' : 'euc_jp',
+ 'ujis' : 'euc_jp',
+ 'u_jis' : 'euc_jp',
+
+ # euc_kr codec
+ 'euckr' : 'euc_kr',
+ 'korean' : 'euc_kr',
+ 'ksc5601' : 'euc_kr',
+ 'ks_c_5601' : 'euc_kr',
+ 'ks_c_5601_1987' : 'euc_kr',
+ 'ksx1001' : 'euc_kr',
+ 'ks_x_1001' : 'euc_kr',
+
+ # gb18030 codec
+ 'gb18030_2000' : 'gb18030',
+
+ # gb2312 codec
+ 'chinese' : 'gb2312',
+ 'csiso58gb231280' : 'gb2312',
+ 'euc_cn' : 'gb2312',
+ 'euccn' : 'gb2312',
+ 'eucgb2312_cn' : 'gb2312',
+ 'gb2312_1980' : 'gb2312',
+ 'gb2312_80' : 'gb2312',
+ 'iso_ir_58' : 'gb2312',
+
+ # gbk codec
+ '936' : 'gbk',
+ 'cp936' : 'gbk',
+ 'ms936' : 'gbk',
+
+ # hex_codec codec
+ 'hex' : 'hex_codec',
+
+ # hp_roman8 codec
+ 'roman8' : 'hp_roman8',
+ 'r8' : 'hp_roman8',
+ 'csHPRoman8' : 'hp_roman8',
+ 'cp1051' : 'hp_roman8',
+ 'ibm1051' : 'hp_roman8',
+
+ # hz codec
+ 'hzgb' : 'hz',
+ 'hz_gb' : 'hz',
+ 'hz_gb_2312' : 'hz',
+
+ # iso2022_jp codec
+ 'csiso2022jp' : 'iso2022_jp',
+ 'iso2022jp' : 'iso2022_jp',
+ 'iso_2022_jp' : 'iso2022_jp',
+
+ # iso2022_jp_1 codec
+ 'iso2022jp_1' : 'iso2022_jp_1',
+ 'iso_2022_jp_1' : 'iso2022_jp_1',
+
+ # iso2022_jp_2 codec
+ 'iso2022jp_2' : 'iso2022_jp_2',
+ 'iso_2022_jp_2' : 'iso2022_jp_2',
+
+ # iso2022_jp_2004 codec
+ 'iso_2022_jp_2004' : 'iso2022_jp_2004',
+ 'iso2022jp_2004' : 'iso2022_jp_2004',
+
+ # iso2022_jp_3 codec
+ 'iso2022jp_3' : 'iso2022_jp_3',
+ 'iso_2022_jp_3' : 'iso2022_jp_3',
+
+ # iso2022_jp_ext codec
+ 'iso2022jp_ext' : 'iso2022_jp_ext',
+ 'iso_2022_jp_ext' : 'iso2022_jp_ext',
+
+ # iso2022_kr codec
+ 'csiso2022kr' : 'iso2022_kr',
+ 'iso2022kr' : 'iso2022_kr',
+ 'iso_2022_kr' : 'iso2022_kr',
+
+ # iso8859_10 codec
+ 'csisolatin6' : 'iso8859_10',
+ 'iso_8859_10' : 'iso8859_10',
+ 'iso_8859_10_1992' : 'iso8859_10',
+ 'iso_ir_157' : 'iso8859_10',
+ 'l6' : 'iso8859_10',
+ 'latin6' : 'iso8859_10',
+
+ # iso8859_11 codec
+ 'thai' : 'iso8859_11',
+ 'iso_8859_11' : 'iso8859_11',
+ 'iso_8859_11_2001' : 'iso8859_11',
+
+ # iso8859_13 codec
+ 'iso_8859_13' : 'iso8859_13',
+ 'l7' : 'iso8859_13',
+ 'latin7' : 'iso8859_13',
+
+ # iso8859_14 codec
+ 'iso_8859_14' : 'iso8859_14',
+ 'iso_8859_14_1998' : 'iso8859_14',
+ 'iso_celtic' : 'iso8859_14',
+ 'iso_ir_199' : 'iso8859_14',
+ 'l8' : 'iso8859_14',
+ 'latin8' : 'iso8859_14',
+
+ # iso8859_15 codec
+ 'iso_8859_15' : 'iso8859_15',
+ 'l9' : 'iso8859_15',
+ 'latin9' : 'iso8859_15',
+
+ # iso8859_16 codec
+ 'iso_8859_16' : 'iso8859_16',
+ 'iso_8859_16_2001' : 'iso8859_16',
+ 'iso_ir_226' : 'iso8859_16',
+ 'l10' : 'iso8859_16',
+ 'latin10' : 'iso8859_16',
+
+ # iso8859_2 codec
+ 'csisolatin2' : 'iso8859_2',
+ 'iso_8859_2' : 'iso8859_2',
+ 'iso_8859_2_1987' : 'iso8859_2',
+ 'iso_ir_101' : 'iso8859_2',
+ 'l2' : 'iso8859_2',
+ 'latin2' : 'iso8859_2',
+
+ # iso8859_3 codec
+ 'csisolatin3' : 'iso8859_3',
+ 'iso_8859_3' : 'iso8859_3',
+ 'iso_8859_3_1988' : 'iso8859_3',
+ 'iso_ir_109' : 'iso8859_3',
+ 'l3' : 'iso8859_3',
+ 'latin3' : 'iso8859_3',
+
+ # iso8859_4 codec
+ 'csisolatin4' : 'iso8859_4',
+ 'iso_8859_4' : 'iso8859_4',
+ 'iso_8859_4_1988' : 'iso8859_4',
+ 'iso_ir_110' : 'iso8859_4',
+ 'l4' : 'iso8859_4',
+ 'latin4' : 'iso8859_4',
+
+ # iso8859_5 codec
+ 'csisolatincyrillic' : 'iso8859_5',
+ 'cyrillic' : 'iso8859_5',
+ 'iso_8859_5' : 'iso8859_5',
+ 'iso_8859_5_1988' : 'iso8859_5',
+ 'iso_ir_144' : 'iso8859_5',
+
+ # iso8859_6 codec
+ 'arabic' : 'iso8859_6',
+ 'asmo_708' : 'iso8859_6',
+ 'csisolatinarabic' : 'iso8859_6',
+ 'ecma_114' : 'iso8859_6',
+ 'iso_8859_6' : 'iso8859_6',
+ 'iso_8859_6_1987' : 'iso8859_6',
+ 'iso_ir_127' : 'iso8859_6',
+
+ # iso8859_7 codec
+ 'csisolatingreek' : 'iso8859_7',
+ 'ecma_118' : 'iso8859_7',
+ 'elot_928' : 'iso8859_7',
+ 'greek' : 'iso8859_7',
+ 'greek8' : 'iso8859_7',
+ 'iso_8859_7' : 'iso8859_7',
+ 'iso_8859_7_1987' : 'iso8859_7',
+ 'iso_ir_126' : 'iso8859_7',
+
+ # iso8859_8 codec
+ 'csisolatinhebrew' : 'iso8859_8',
+ 'hebrew' : 'iso8859_8',
+ 'iso_8859_8' : 'iso8859_8',
+ 'iso_8859_8_1988' : 'iso8859_8',
+ 'iso_ir_138' : 'iso8859_8',
+
+ # iso8859_9 codec
+ 'csisolatin5' : 'iso8859_9',
+ 'iso_8859_9' : 'iso8859_9',
+ 'iso_8859_9_1989' : 'iso8859_9',
+ 'iso_ir_148' : 'iso8859_9',
+ 'l5' : 'iso8859_9',
+ 'latin5' : 'iso8859_9',
+
+ # johab codec
+ 'cp1361' : 'johab',
+ 'ms1361' : 'johab',
+
+ # koi8_r codec
+ 'cskoi8r' : 'koi8_r',
+
+ # kz1048 codec
+ 'kz_1048' : 'kz1048',
+ 'rk1048' : 'kz1048',
+ 'strk1048_2002' : 'kz1048',
+
+ # latin_1 codec
+ #
+ # Note that the latin_1 codec is implemented internally in C and a
+ # lot faster than the charmap codec iso8859_1 which uses the same
+ # encoding. This is why we discourage the use of the iso8859_1
+ # codec and alias it to latin_1 instead.
+ #
+ '8859' : 'latin_1',
+ 'cp819' : 'latin_1',
+ 'csisolatin1' : 'latin_1',
+ 'ibm819' : 'latin_1',
+ 'iso8859' : 'latin_1',
+ 'iso8859_1' : 'latin_1',
+ 'iso_8859_1' : 'latin_1',
+ 'iso_8859_1_1987' : 'latin_1',
+ 'iso_ir_100' : 'latin_1',
+ 'l1' : 'latin_1',
+ 'latin' : 'latin_1',
+ 'latin1' : 'latin_1',
+
+ # mac_cyrillic codec
+ 'maccyrillic' : 'mac_cyrillic',
+
+ # mac_greek codec
+ 'macgreek' : 'mac_greek',
+
+ # mac_iceland codec
+ 'maciceland' : 'mac_iceland',
+
+ # mac_latin2 codec
+ 'maccentraleurope' : 'mac_latin2',
+ 'maclatin2' : 'mac_latin2',
+
+ # mac_roman codec
+ 'macintosh' : 'mac_roman',
+ 'macroman' : 'mac_roman',
+
+ # mac_turkish codec
+ 'macturkish' : 'mac_turkish',
+
+ # mbcs codec
+ 'ansi' : 'mbcs',
+ 'dbcs' : 'mbcs',
+
+ # ptcp154 codec
+ 'csptcp154' : 'ptcp154',
+ 'pt154' : 'ptcp154',
+ 'cp154' : 'ptcp154',
+ 'cyrillic_asian' : 'ptcp154',
+
+ # quopri_codec codec
+ 'quopri' : 'quopri_codec',
+ 'quoted_printable' : 'quopri_codec',
+ 'quotedprintable' : 'quopri_codec',
+
+ # rot_13 codec
+ 'rot13' : 'rot_13',
+
+ # shift_jis codec
+ 'csshiftjis' : 'shift_jis',
+ 'shiftjis' : 'shift_jis',
+ 'sjis' : 'shift_jis',
+ 's_jis' : 'shift_jis',
+
+ # shift_jis_2004 codec
+ 'shiftjis2004' : 'shift_jis_2004',
+ 'sjis_2004' : 'shift_jis_2004',
+ 's_jis_2004' : 'shift_jis_2004',
+
+ # shift_jisx0213 codec
+ 'shiftjisx0213' : 'shift_jisx0213',
+ 'sjisx0213' : 'shift_jisx0213',
+ 's_jisx0213' : 'shift_jisx0213',
+
+ # tactis codec
+ 'tis260' : 'tactis',
+
+ # tis_620 codec
+ 'tis620' : 'tis_620',
+ 'tis_620_0' : 'tis_620',
+ 'tis_620_2529_0' : 'tis_620',
+ 'tis_620_2529_1' : 'tis_620',
+ 'iso_ir_166' : 'tis_620',
+
+ # utf_16 codec
+ 'u16' : 'utf_16',
+ 'utf16' : 'utf_16',
+
+ # utf_16_be codec
+ 'unicodebigunmarked' : 'utf_16_be',
+ 'utf_16be' : 'utf_16_be',
+
+ # utf_16_le codec
+ 'unicodelittleunmarked' : 'utf_16_le',
+ 'utf_16le' : 'utf_16_le',
+
+ # utf_32 codec
+ 'u32' : 'utf_32',
+ 'utf32' : 'utf_32',
+
+ # utf_32_be codec
+ 'utf_32be' : 'utf_32_be',
+
+ # utf_32_le codec
+ 'utf_32le' : 'utf_32_le',
+
+ # utf_7 codec
+ 'u7' : 'utf_7',
+ 'utf7' : 'utf_7',
+ 'unicode_1_1_utf_7' : 'utf_7',
+
+ # utf_8 codec
+ 'u8' : 'utf_8',
+ 'utf' : 'utf_8',
+ 'utf8' : 'utf_8',
+ 'utf8_ucs2' : 'utf_8',
+ 'utf8_ucs4' : 'utf_8',
+ 'cp65001' : 'utf_8',
+
+ # uu_codec codec
+ 'uu' : 'uu_codec',
+
+ # zlib_codec codec
+ 'zip' : 'zlib_codec',
+ 'zlib' : 'zlib_codec',
+
+ # temporary mac CJK aliases, will be replaced by proper codecs in 3.1
+ 'x_mac_japanese' : 'shift_jis',
+ 'x_mac_korean' : 'euc_kr',
+ 'x_mac_simp_chinese' : 'gb2312',
+ 'x_mac_trad_chinese' : 'big5',
+}
diff --git a/dist/lib/encodings/ascii.py b/dist/lib/encodings/ascii.py
new file mode 100644
index 0000000..2033cde
--- /dev/null
+++ b/dist/lib/encodings/ascii.py
@@ -0,0 +1,50 @@
+""" Python 'ascii' Codec
+
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+
+"""
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ # Note: Binding these as C functions will result in the class not
+ # converting them to methods. This is intended.
+ encode = codecs.ascii_encode
+ decode = codecs.ascii_decode
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.ascii_encode(input, self.errors)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.ascii_decode(input, self.errors)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+class StreamConverter(StreamWriter,StreamReader):
+
+ encode = codecs.ascii_decode
+ decode = codecs.ascii_encode
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='ascii',
+ encode=Codec.encode,
+ decode=Codec.decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamwriter=StreamWriter,
+ streamreader=StreamReader,
+ )
diff --git a/dist/lib/encodings/base64_codec.py b/dist/lib/encodings/base64_codec.py
new file mode 100644
index 0000000..8e7703b
--- /dev/null
+++ b/dist/lib/encodings/base64_codec.py
@@ -0,0 +1,55 @@
+"""Python 'base64_codec' Codec - base64 content transfer encoding.
+
+This codec de/encodes from bytes to bytes.
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+"""
+
+import codecs
+import base64
+
+### Codec APIs
+
+def base64_encode(input, errors='strict'):
+ assert errors == 'strict'
+ return (base64.encodebytes(input), len(input))
+
+def base64_decode(input, errors='strict'):
+ assert errors == 'strict'
+ return (base64.decodebytes(input), len(input))
+
+class Codec(codecs.Codec):
+ def encode(self, input, errors='strict'):
+ return base64_encode(input, errors)
+ def decode(self, input, errors='strict'):
+ return base64_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ assert self.errors == 'strict'
+ return base64.encodebytes(input)
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ assert self.errors == 'strict'
+ return base64.decodebytes(input)
+
+class StreamWriter(Codec, codecs.StreamWriter):
+ charbuffertype = bytes
+
+class StreamReader(Codec, codecs.StreamReader):
+ charbuffertype = bytes
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='base64',
+ encode=base64_encode,
+ decode=base64_decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamwriter=StreamWriter,
+ streamreader=StreamReader,
+ _is_text_encoding=False,
+ )
diff --git a/dist/lib/encodings/big5.py b/dist/lib/encodings/big5.py
new file mode 100644
index 0000000..7adeb0e
--- /dev/null
+++ b/dist/lib/encodings/big5.py
@@ -0,0 +1,39 @@
+#
+# big5.py: Python Unicode Codec for BIG5
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_tw, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_tw.getcodec('big5')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='big5',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/big5hkscs.py b/dist/lib/encodings/big5hkscs.py
new file mode 100644
index 0000000..350df37
--- /dev/null
+++ b/dist/lib/encodings/big5hkscs.py
@@ -0,0 +1,39 @@
+#
+# big5hkscs.py: Python Unicode Codec for BIG5HKSCS
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_hk, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_hk.getcodec('big5hkscs')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='big5hkscs',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/bz2_codec.py b/dist/lib/encodings/bz2_codec.py
new file mode 100644
index 0000000..fd9495e
--- /dev/null
+++ b/dist/lib/encodings/bz2_codec.py
@@ -0,0 +1,78 @@
+"""Python 'bz2_codec' Codec - bz2 compression encoding.
+
+This codec de/encodes from bytes to bytes and is therefore usable with
+bytes.transform() and bytes.untransform().
+
+Adapted by Raymond Hettinger from zlib_codec.py which was written
+by Marc-Andre Lemburg (mal@lemburg.com).
+"""
+
+import codecs
+import bz2 # this codec needs the optional bz2 module !
+
+### Codec APIs
+
+def bz2_encode(input, errors='strict'):
+ assert errors == 'strict'
+ return (bz2.compress(input), len(input))
+
+def bz2_decode(input, errors='strict'):
+ assert errors == 'strict'
+ return (bz2.decompress(input), len(input))
+
+class Codec(codecs.Codec):
+ def encode(self, input, errors='strict'):
+ return bz2_encode(input, errors)
+ def decode(self, input, errors='strict'):
+ return bz2_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def __init__(self, errors='strict'):
+ assert errors == 'strict'
+ self.errors = errors
+ self.compressobj = bz2.BZ2Compressor()
+
+ def encode(self, input, final=False):
+ if final:
+ c = self.compressobj.compress(input)
+ return c + self.compressobj.flush()
+ else:
+ return self.compressobj.compress(input)
+
+ def reset(self):
+ self.compressobj = bz2.BZ2Compressor()
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def __init__(self, errors='strict'):
+ assert errors == 'strict'
+ self.errors = errors
+ self.decompressobj = bz2.BZ2Decompressor()
+
+ def decode(self, input, final=False):
+ try:
+ return self.decompressobj.decompress(input)
+ except EOFError:
+ return ''
+
+ def reset(self):
+ self.decompressobj = bz2.BZ2Decompressor()
+
+class StreamWriter(Codec, codecs.StreamWriter):
+ charbuffertype = bytes
+
+class StreamReader(Codec, codecs.StreamReader):
+ charbuffertype = bytes
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name="bz2",
+ encode=bz2_encode,
+ decode=bz2_decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamwriter=StreamWriter,
+ streamreader=StreamReader,
+ _is_text_encoding=False,
+ )
diff --git a/dist/lib/encodings/charmap.py b/dist/lib/encodings/charmap.py
new file mode 100644
index 0000000..81189b1
--- /dev/null
+++ b/dist/lib/encodings/charmap.py
@@ -0,0 +1,69 @@
+""" Generic Python Character Mapping Codec.
+
+ Use this codec directly rather than through the automatic
+ conversion mechanisms supplied by unicode() and .encode().
+
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ # Note: Binding these as C functions will result in the class not
+ # converting them to methods. This is intended.
+ encode = codecs.charmap_encode
+ decode = codecs.charmap_decode
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def __init__(self, errors='strict', mapping=None):
+ codecs.IncrementalEncoder.__init__(self, errors)
+ self.mapping = mapping
+
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input, self.errors, self.mapping)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def __init__(self, errors='strict', mapping=None):
+ codecs.IncrementalDecoder.__init__(self, errors)
+ self.mapping = mapping
+
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input, self.errors, self.mapping)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+
+ def __init__(self,stream,errors='strict',mapping=None):
+ codecs.StreamWriter.__init__(self,stream,errors)
+ self.mapping = mapping
+
+ def encode(self,input,errors='strict'):
+ return Codec.encode(input,errors,self.mapping)
+
+class StreamReader(Codec,codecs.StreamReader):
+
+ def __init__(self,stream,errors='strict',mapping=None):
+ codecs.StreamReader.__init__(self,stream,errors)
+ self.mapping = mapping
+
+ def decode(self,input,errors='strict'):
+ return Codec.decode(input,errors,self.mapping)
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='charmap',
+ encode=Codec.encode,
+ decode=Codec.decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamwriter=StreamWriter,
+ streamreader=StreamReader,
+ )
diff --git a/dist/lib/encodings/cp037.py b/dist/lib/encodings/cp037.py
new file mode 100644
index 0000000..4edd708
--- /dev/null
+++ b/dist/lib/encodings/cp037.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp037 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp037',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x9c' # 0x04 -> CONTROL
+ '\t' # 0x05 -> HORIZONTAL TABULATION
+ '\x86' # 0x06 -> CONTROL
+ '\x7f' # 0x07 -> DELETE
+ '\x97' # 0x08 -> CONTROL
+ '\x8d' # 0x09 -> CONTROL
+ '\x8e' # 0x0A -> CONTROL
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x9d' # 0x14 -> CONTROL
+ '\x85' # 0x15 -> CONTROL
+ '\x08' # 0x16 -> BACKSPACE
+ '\x87' # 0x17 -> CONTROL
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x92' # 0x1A -> CONTROL
+ '\x8f' # 0x1B -> CONTROL
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ '\x80' # 0x20 -> CONTROL
+ '\x81' # 0x21 -> CONTROL
+ '\x82' # 0x22 -> CONTROL
+ '\x83' # 0x23 -> CONTROL
+ '\x84' # 0x24 -> CONTROL
+ '\n' # 0x25 -> LINE FEED
+ '\x17' # 0x26 -> END OF TRANSMISSION BLOCK
+ '\x1b' # 0x27 -> ESCAPE
+ '\x88' # 0x28 -> CONTROL
+ '\x89' # 0x29 -> CONTROL
+ '\x8a' # 0x2A -> CONTROL
+ '\x8b' # 0x2B -> CONTROL
+ '\x8c' # 0x2C -> CONTROL
+ '\x05' # 0x2D -> ENQUIRY
+ '\x06' # 0x2E -> ACKNOWLEDGE
+ '\x07' # 0x2F -> BELL
+ '\x90' # 0x30 -> CONTROL
+ '\x91' # 0x31 -> CONTROL
+ '\x16' # 0x32 -> SYNCHRONOUS IDLE
+ '\x93' # 0x33 -> CONTROL
+ '\x94' # 0x34 -> CONTROL
+ '\x95' # 0x35 -> CONTROL
+ '\x96' # 0x36 -> CONTROL
+ '\x04' # 0x37 -> END OF TRANSMISSION
+ '\x98' # 0x38 -> CONTROL
+ '\x99' # 0x39 -> CONTROL
+ '\x9a' # 0x3A -> CONTROL
+ '\x9b' # 0x3B -> CONTROL
+ '\x14' # 0x3C -> DEVICE CONTROL FOUR
+ '\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
+ '\x9e' # 0x3E -> CONTROL
+ '\x1a' # 0x3F -> SUBSTITUTE
+ ' ' # 0x40 -> SPACE
+ '\xa0' # 0x41 -> NO-BREAK SPACE
+ '\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
+ '\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
+ '\xa2' # 0x4A -> CENT SIGN
+ '.' # 0x4B -> FULL STOP
+ '<' # 0x4C -> LESS-THAN SIGN
+ '(' # 0x4D -> LEFT PARENTHESIS
+ '+' # 0x4E -> PLUS SIGN
+ '|' # 0x4F -> VERTICAL LINE
+ '&' # 0x50 -> AMPERSAND
+ '\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
+ '\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
+ '\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
+ '!' # 0x5A -> EXCLAMATION MARK
+ '$' # 0x5B -> DOLLAR SIGN
+ '*' # 0x5C -> ASTERISK
+ ')' # 0x5D -> RIGHT PARENTHESIS
+ ';' # 0x5E -> SEMICOLON
+ '\xac' # 0x5F -> NOT SIGN
+ '-' # 0x60 -> HYPHEN-MINUS
+ '/' # 0x61 -> SOLIDUS
+ '\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
+ '\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xa6' # 0x6A -> BROKEN BAR
+ ',' # 0x6B -> COMMA
+ '%' # 0x6C -> PERCENT SIGN
+ '_' # 0x6D -> LOW LINE
+ '>' # 0x6E -> GREATER-THAN SIGN
+ '?' # 0x6F -> QUESTION MARK
+ '\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
+ '\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
+ '`' # 0x79 -> GRAVE ACCENT
+ ':' # 0x7A -> COLON
+ '#' # 0x7B -> NUMBER SIGN
+ '@' # 0x7C -> COMMERCIAL AT
+ "'" # 0x7D -> APOSTROPHE
+ '=' # 0x7E -> EQUALS SIGN
+ '"' # 0x7F -> QUOTATION MARK
+ '\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
+ 'a' # 0x81 -> LATIN SMALL LETTER A
+ 'b' # 0x82 -> LATIN SMALL LETTER B
+ 'c' # 0x83 -> LATIN SMALL LETTER C
+ 'd' # 0x84 -> LATIN SMALL LETTER D
+ 'e' # 0x85 -> LATIN SMALL LETTER E
+ 'f' # 0x86 -> LATIN SMALL LETTER F
+ 'g' # 0x87 -> LATIN SMALL LETTER G
+ 'h' # 0x88 -> LATIN SMALL LETTER H
+ 'i' # 0x89 -> LATIN SMALL LETTER I
+ '\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC)
+ '\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE
+ '\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC)
+ '\xb1' # 0x8F -> PLUS-MINUS SIGN
+ '\xb0' # 0x90 -> DEGREE SIGN
+ 'j' # 0x91 -> LATIN SMALL LETTER J
+ 'k' # 0x92 -> LATIN SMALL LETTER K
+ 'l' # 0x93 -> LATIN SMALL LETTER L
+ 'm' # 0x94 -> LATIN SMALL LETTER M
+ 'n' # 0x95 -> LATIN SMALL LETTER N
+ 'o' # 0x96 -> LATIN SMALL LETTER O
+ 'p' # 0x97 -> LATIN SMALL LETTER P
+ 'q' # 0x98 -> LATIN SMALL LETTER Q
+ 'r' # 0x99 -> LATIN SMALL LETTER R
+ '\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR
+ '\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR
+ '\xe6' # 0x9C -> LATIN SMALL LIGATURE AE
+ '\xb8' # 0x9D -> CEDILLA
+ '\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE
+ '\xa4' # 0x9F -> CURRENCY SIGN
+ '\xb5' # 0xA0 -> MICRO SIGN
+ '~' # 0xA1 -> TILDE
+ 's' # 0xA2 -> LATIN SMALL LETTER S
+ 't' # 0xA3 -> LATIN SMALL LETTER T
+ 'u' # 0xA4 -> LATIN SMALL LETTER U
+ 'v' # 0xA5 -> LATIN SMALL LETTER V
+ 'w' # 0xA6 -> LATIN SMALL LETTER W
+ 'x' # 0xA7 -> LATIN SMALL LETTER X
+ 'y' # 0xA8 -> LATIN SMALL LETTER Y
+ 'z' # 0xA9 -> LATIN SMALL LETTER Z
+ '\xa1' # 0xAA -> INVERTED EXCLAMATION MARK
+ '\xbf' # 0xAB -> INVERTED QUESTION MARK
+ '\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC)
+ '\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
+ '\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC)
+ '\xae' # 0xAF -> REGISTERED SIGN
+ '^' # 0xB0 -> CIRCUMFLEX ACCENT
+ '\xa3' # 0xB1 -> POUND SIGN
+ '\xa5' # 0xB2 -> YEN SIGN
+ '\xb7' # 0xB3 -> MIDDLE DOT
+ '\xa9' # 0xB4 -> COPYRIGHT SIGN
+ '\xa7' # 0xB5 -> SECTION SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
+ '\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
+ '[' # 0xBA -> LEFT SQUARE BRACKET
+ ']' # 0xBB -> RIGHT SQUARE BRACKET
+ '\xaf' # 0xBC -> MACRON
+ '\xa8' # 0xBD -> DIAERESIS
+ '\xb4' # 0xBE -> ACUTE ACCENT
+ '\xd7' # 0xBF -> MULTIPLICATION SIGN
+ '{' # 0xC0 -> LEFT CURLY BRACKET
+ 'A' # 0xC1 -> LATIN CAPITAL LETTER A
+ 'B' # 0xC2 -> LATIN CAPITAL LETTER B
+ 'C' # 0xC3 -> LATIN CAPITAL LETTER C
+ 'D' # 0xC4 -> LATIN CAPITAL LETTER D
+ 'E' # 0xC5 -> LATIN CAPITAL LETTER E
+ 'F' # 0xC6 -> LATIN CAPITAL LETTER F
+ 'G' # 0xC7 -> LATIN CAPITAL LETTER G
+ 'H' # 0xC8 -> LATIN CAPITAL LETTER H
+ 'I' # 0xC9 -> LATIN CAPITAL LETTER I
+ '\xad' # 0xCA -> SOFT HYPHEN
+ '\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE
+ '\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE
+ '\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE
+ '}' # 0xD0 -> RIGHT CURLY BRACKET
+ 'J' # 0xD1 -> LATIN CAPITAL LETTER J
+ 'K' # 0xD2 -> LATIN CAPITAL LETTER K
+ 'L' # 0xD3 -> LATIN CAPITAL LETTER L
+ 'M' # 0xD4 -> LATIN CAPITAL LETTER M
+ 'N' # 0xD5 -> LATIN CAPITAL LETTER N
+ 'O' # 0xD6 -> LATIN CAPITAL LETTER O
+ 'P' # 0xD7 -> LATIN CAPITAL LETTER P
+ 'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
+ 'R' # 0xD9 -> LATIN CAPITAL LETTER R
+ '\xb9' # 0xDA -> SUPERSCRIPT ONE
+ '\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE
+ '\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE
+ '\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
+ '\\' # 0xE0 -> REVERSE SOLIDUS
+ '\xf7' # 0xE1 -> DIVISION SIGN
+ 'S' # 0xE2 -> LATIN CAPITAL LETTER S
+ 'T' # 0xE3 -> LATIN CAPITAL LETTER T
+ 'U' # 0xE4 -> LATIN CAPITAL LETTER U
+ 'V' # 0xE5 -> LATIN CAPITAL LETTER V
+ 'W' # 0xE6 -> LATIN CAPITAL LETTER W
+ 'X' # 0xE7 -> LATIN CAPITAL LETTER X
+ 'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
+ '\xb2' # 0xEA -> SUPERSCRIPT TWO
+ '\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE
+ '\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE
+ '0' # 0xF0 -> DIGIT ZERO
+ '1' # 0xF1 -> DIGIT ONE
+ '2' # 0xF2 -> DIGIT TWO
+ '3' # 0xF3 -> DIGIT THREE
+ '4' # 0xF4 -> DIGIT FOUR
+ '5' # 0xF5 -> DIGIT FIVE
+ '6' # 0xF6 -> DIGIT SIX
+ '7' # 0xF7 -> DIGIT SEVEN
+ '8' # 0xF8 -> DIGIT EIGHT
+ '9' # 0xF9 -> DIGIT NINE
+ '\xb3' # 0xFA -> SUPERSCRIPT THREE
+ '\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ '\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\x9f' # 0xFF -> CONTROL
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp1006.py b/dist/lib/encodings/cp1006.py
new file mode 100644
index 0000000..a1221c3
--- /dev/null
+++ b/dist/lib/encodings/cp1006.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1006 generated from 'MAPPINGS/VENDORS/MISC/CP1006.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1006',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\x80' # 0x80 ->
+ '\x81' # 0x81 ->
+ '\x82' # 0x82 ->
+ '\x83' # 0x83 ->
+ '\x84' # 0x84 ->
+ '\x85' # 0x85 ->
+ '\x86' # 0x86 ->
+ '\x87' # 0x87 ->
+ '\x88' # 0x88 ->
+ '\x89' # 0x89 ->
+ '\x8a' # 0x8A ->
+ '\x8b' # 0x8B ->
+ '\x8c' # 0x8C ->
+ '\x8d' # 0x8D ->
+ '\x8e' # 0x8E ->
+ '\x8f' # 0x8F ->
+ '\x90' # 0x90 ->
+ '\x91' # 0x91 ->
+ '\x92' # 0x92 ->
+ '\x93' # 0x93 ->
+ '\x94' # 0x94 ->
+ '\x95' # 0x95 ->
+ '\x96' # 0x96 ->
+ '\x97' # 0x97 ->
+ '\x98' # 0x98 ->
+ '\x99' # 0x99 ->
+ '\x9a' # 0x9A ->
+ '\x9b' # 0x9B ->
+ '\x9c' # 0x9C ->
+ '\x9d' # 0x9D ->
+ '\x9e' # 0x9E ->
+ '\x9f' # 0x9F ->
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\u06f0' # 0xA1 -> EXTENDED ARABIC-INDIC DIGIT ZERO
+ '\u06f1' # 0xA2 -> EXTENDED ARABIC-INDIC DIGIT ONE
+ '\u06f2' # 0xA3 -> EXTENDED ARABIC-INDIC DIGIT TWO
+ '\u06f3' # 0xA4 -> EXTENDED ARABIC-INDIC DIGIT THREE
+ '\u06f4' # 0xA5 -> EXTENDED ARABIC-INDIC DIGIT FOUR
+ '\u06f5' # 0xA6 -> EXTENDED ARABIC-INDIC DIGIT FIVE
+ '\u06f6' # 0xA7 -> EXTENDED ARABIC-INDIC DIGIT SIX
+ '\u06f7' # 0xA8 -> EXTENDED ARABIC-INDIC DIGIT SEVEN
+ '\u06f8' # 0xA9 -> EXTENDED ARABIC-INDIC DIGIT EIGHT
+ '\u06f9' # 0xAA -> EXTENDED ARABIC-INDIC DIGIT NINE
+ '\u060c' # 0xAB -> ARABIC COMMA
+ '\u061b' # 0xAC -> ARABIC SEMICOLON
+ '\xad' # 0xAD -> SOFT HYPHEN
+ '\u061f' # 0xAE -> ARABIC QUESTION MARK
+ '\ufe81' # 0xAF -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
+ '\ufe8d' # 0xB0 -> ARABIC LETTER ALEF ISOLATED FORM
+ '\ufe8e' # 0xB1 -> ARABIC LETTER ALEF FINAL FORM
+ '\ufe8e' # 0xB2 -> ARABIC LETTER ALEF FINAL FORM
+ '\ufe8f' # 0xB3 -> ARABIC LETTER BEH ISOLATED FORM
+ '\ufe91' # 0xB4 -> ARABIC LETTER BEH INITIAL FORM
+ '\ufb56' # 0xB5 -> ARABIC LETTER PEH ISOLATED FORM
+ '\ufb58' # 0xB6 -> ARABIC LETTER PEH INITIAL FORM
+ '\ufe93' # 0xB7 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM
+ '\ufe95' # 0xB8 -> ARABIC LETTER TEH ISOLATED FORM
+ '\ufe97' # 0xB9 -> ARABIC LETTER TEH INITIAL FORM
+ '\ufb66' # 0xBA -> ARABIC LETTER TTEH ISOLATED FORM
+ '\ufb68' # 0xBB -> ARABIC LETTER TTEH INITIAL FORM
+ '\ufe99' # 0xBC -> ARABIC LETTER THEH ISOLATED FORM
+ '\ufe9b' # 0xBD -> ARABIC LETTER THEH INITIAL FORM
+ '\ufe9d' # 0xBE -> ARABIC LETTER JEEM ISOLATED FORM
+ '\ufe9f' # 0xBF -> ARABIC LETTER JEEM INITIAL FORM
+ '\ufb7a' # 0xC0 -> ARABIC LETTER TCHEH ISOLATED FORM
+ '\ufb7c' # 0xC1 -> ARABIC LETTER TCHEH INITIAL FORM
+ '\ufea1' # 0xC2 -> ARABIC LETTER HAH ISOLATED FORM
+ '\ufea3' # 0xC3 -> ARABIC LETTER HAH INITIAL FORM
+ '\ufea5' # 0xC4 -> ARABIC LETTER KHAH ISOLATED FORM
+ '\ufea7' # 0xC5 -> ARABIC LETTER KHAH INITIAL FORM
+ '\ufea9' # 0xC6 -> ARABIC LETTER DAL ISOLATED FORM
+ '\ufb84' # 0xC7 -> ARABIC LETTER DAHAL ISOLATED FORMN
+ '\ufeab' # 0xC8 -> ARABIC LETTER THAL ISOLATED FORM
+ '\ufead' # 0xC9 -> ARABIC LETTER REH ISOLATED FORM
+ '\ufb8c' # 0xCA -> ARABIC LETTER RREH ISOLATED FORM
+ '\ufeaf' # 0xCB -> ARABIC LETTER ZAIN ISOLATED FORM
+ '\ufb8a' # 0xCC -> ARABIC LETTER JEH ISOLATED FORM
+ '\ufeb1' # 0xCD -> ARABIC LETTER SEEN ISOLATED FORM
+ '\ufeb3' # 0xCE -> ARABIC LETTER SEEN INITIAL FORM
+ '\ufeb5' # 0xCF -> ARABIC LETTER SHEEN ISOLATED FORM
+ '\ufeb7' # 0xD0 -> ARABIC LETTER SHEEN INITIAL FORM
+ '\ufeb9' # 0xD1 -> ARABIC LETTER SAD ISOLATED FORM
+ '\ufebb' # 0xD2 -> ARABIC LETTER SAD INITIAL FORM
+ '\ufebd' # 0xD3 -> ARABIC LETTER DAD ISOLATED FORM
+ '\ufebf' # 0xD4 -> ARABIC LETTER DAD INITIAL FORM
+ '\ufec1' # 0xD5 -> ARABIC LETTER TAH ISOLATED FORM
+ '\ufec5' # 0xD6 -> ARABIC LETTER ZAH ISOLATED FORM
+ '\ufec9' # 0xD7 -> ARABIC LETTER AIN ISOLATED FORM
+ '\ufeca' # 0xD8 -> ARABIC LETTER AIN FINAL FORM
+ '\ufecb' # 0xD9 -> ARABIC LETTER AIN INITIAL FORM
+ '\ufecc' # 0xDA -> ARABIC LETTER AIN MEDIAL FORM
+ '\ufecd' # 0xDB -> ARABIC LETTER GHAIN ISOLATED FORM
+ '\ufece' # 0xDC -> ARABIC LETTER GHAIN FINAL FORM
+ '\ufecf' # 0xDD -> ARABIC LETTER GHAIN INITIAL FORM
+ '\ufed0' # 0xDE -> ARABIC LETTER GHAIN MEDIAL FORM
+ '\ufed1' # 0xDF -> ARABIC LETTER FEH ISOLATED FORM
+ '\ufed3' # 0xE0 -> ARABIC LETTER FEH INITIAL FORM
+ '\ufed5' # 0xE1 -> ARABIC LETTER QAF ISOLATED FORM
+ '\ufed7' # 0xE2 -> ARABIC LETTER QAF INITIAL FORM
+ '\ufed9' # 0xE3 -> ARABIC LETTER KAF ISOLATED FORM
+ '\ufedb' # 0xE4 -> ARABIC LETTER KAF INITIAL FORM
+ '\ufb92' # 0xE5 -> ARABIC LETTER GAF ISOLATED FORM
+ '\ufb94' # 0xE6 -> ARABIC LETTER GAF INITIAL FORM
+ '\ufedd' # 0xE7 -> ARABIC LETTER LAM ISOLATED FORM
+ '\ufedf' # 0xE8 -> ARABIC LETTER LAM INITIAL FORM
+ '\ufee0' # 0xE9 -> ARABIC LETTER LAM MEDIAL FORM
+ '\ufee1' # 0xEA -> ARABIC LETTER MEEM ISOLATED FORM
+ '\ufee3' # 0xEB -> ARABIC LETTER MEEM INITIAL FORM
+ '\ufb9e' # 0xEC -> ARABIC LETTER NOON GHUNNA ISOLATED FORM
+ '\ufee5' # 0xED -> ARABIC LETTER NOON ISOLATED FORM
+ '\ufee7' # 0xEE -> ARABIC LETTER NOON INITIAL FORM
+ '\ufe85' # 0xEF -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
+ '\ufeed' # 0xF0 -> ARABIC LETTER WAW ISOLATED FORM
+ '\ufba6' # 0xF1 -> ARABIC LETTER HEH GOAL ISOLATED FORM
+ '\ufba8' # 0xF2 -> ARABIC LETTER HEH GOAL INITIAL FORM
+ '\ufba9' # 0xF3 -> ARABIC LETTER HEH GOAL MEDIAL FORM
+ '\ufbaa' # 0xF4 -> ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM
+ '\ufe80' # 0xF5 -> ARABIC LETTER HAMZA ISOLATED FORM
+ '\ufe89' # 0xF6 -> ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM
+ '\ufe8a' # 0xF7 -> ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM
+ '\ufe8b' # 0xF8 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
+ '\ufef1' # 0xF9 -> ARABIC LETTER YEH ISOLATED FORM
+ '\ufef2' # 0xFA -> ARABIC LETTER YEH FINAL FORM
+ '\ufef3' # 0xFB -> ARABIC LETTER YEH INITIAL FORM
+ '\ufbb0' # 0xFC -> ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM
+ '\ufbae' # 0xFD -> ARABIC LETTER YEH BARREE ISOLATED FORM
+ '\ufe7c' # 0xFE -> ARABIC SHADDA ISOLATED FORM
+ '\ufe7d' # 0xFF -> ARABIC SHADDA MEDIAL FORM
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp1026.py b/dist/lib/encodings/cp1026.py
new file mode 100644
index 0000000..46f71f7
--- /dev/null
+++ b/dist/lib/encodings/cp1026.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1026 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1026',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x9c' # 0x04 -> CONTROL
+ '\t' # 0x05 -> HORIZONTAL TABULATION
+ '\x86' # 0x06 -> CONTROL
+ '\x7f' # 0x07 -> DELETE
+ '\x97' # 0x08 -> CONTROL
+ '\x8d' # 0x09 -> CONTROL
+ '\x8e' # 0x0A -> CONTROL
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x9d' # 0x14 -> CONTROL
+ '\x85' # 0x15 -> CONTROL
+ '\x08' # 0x16 -> BACKSPACE
+ '\x87' # 0x17 -> CONTROL
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x92' # 0x1A -> CONTROL
+ '\x8f' # 0x1B -> CONTROL
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ '\x80' # 0x20 -> CONTROL
+ '\x81' # 0x21 -> CONTROL
+ '\x82' # 0x22 -> CONTROL
+ '\x83' # 0x23 -> CONTROL
+ '\x84' # 0x24 -> CONTROL
+ '\n' # 0x25 -> LINE FEED
+ '\x17' # 0x26 -> END OF TRANSMISSION BLOCK
+ '\x1b' # 0x27 -> ESCAPE
+ '\x88' # 0x28 -> CONTROL
+ '\x89' # 0x29 -> CONTROL
+ '\x8a' # 0x2A -> CONTROL
+ '\x8b' # 0x2B -> CONTROL
+ '\x8c' # 0x2C -> CONTROL
+ '\x05' # 0x2D -> ENQUIRY
+ '\x06' # 0x2E -> ACKNOWLEDGE
+ '\x07' # 0x2F -> BELL
+ '\x90' # 0x30 -> CONTROL
+ '\x91' # 0x31 -> CONTROL
+ '\x16' # 0x32 -> SYNCHRONOUS IDLE
+ '\x93' # 0x33 -> CONTROL
+ '\x94' # 0x34 -> CONTROL
+ '\x95' # 0x35 -> CONTROL
+ '\x96' # 0x36 -> CONTROL
+ '\x04' # 0x37 -> END OF TRANSMISSION
+ '\x98' # 0x38 -> CONTROL
+ '\x99' # 0x39 -> CONTROL
+ '\x9a' # 0x3A -> CONTROL
+ '\x9b' # 0x3B -> CONTROL
+ '\x14' # 0x3C -> DEVICE CONTROL FOUR
+ '\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
+ '\x9e' # 0x3E -> CONTROL
+ '\x1a' # 0x3F -> SUBSTITUTE
+ ' ' # 0x40 -> SPACE
+ '\xa0' # 0x41 -> NO-BREAK SPACE
+ '\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
+ '\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '{' # 0x48 -> LEFT CURLY BRACKET
+ '\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
+ '\xc7' # 0x4A -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '.' # 0x4B -> FULL STOP
+ '<' # 0x4C -> LESS-THAN SIGN
+ '(' # 0x4D -> LEFT PARENTHESIS
+ '+' # 0x4E -> PLUS SIGN
+ '!' # 0x4F -> EXCLAMATION MARK
+ '&' # 0x50 -> AMPERSAND
+ '\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
+ '\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
+ '\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
+ '\u011e' # 0x5A -> LATIN CAPITAL LETTER G WITH BREVE
+ '\u0130' # 0x5B -> LATIN CAPITAL LETTER I WITH DOT ABOVE
+ '*' # 0x5C -> ASTERISK
+ ')' # 0x5D -> RIGHT PARENTHESIS
+ ';' # 0x5E -> SEMICOLON
+ '^' # 0x5F -> CIRCUMFLEX ACCENT
+ '-' # 0x60 -> HYPHEN-MINUS
+ '/' # 0x61 -> SOLIDUS
+ '\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
+ '\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '[' # 0x68 -> LEFT SQUARE BRACKET
+ '\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\u015f' # 0x6A -> LATIN SMALL LETTER S WITH CEDILLA
+ ',' # 0x6B -> COMMA
+ '%' # 0x6C -> PERCENT SIGN
+ '_' # 0x6D -> LOW LINE
+ '>' # 0x6E -> GREATER-THAN SIGN
+ '?' # 0x6F -> QUESTION MARK
+ '\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
+ '\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
+ '\u0131' # 0x79 -> LATIN SMALL LETTER DOTLESS I
+ ':' # 0x7A -> COLON
+ '\xd6' # 0x7B -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\u015e' # 0x7C -> LATIN CAPITAL LETTER S WITH CEDILLA
+ "'" # 0x7D -> APOSTROPHE
+ '=' # 0x7E -> EQUALS SIGN
+ '\xdc' # 0x7F -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
+ 'a' # 0x81 -> LATIN SMALL LETTER A
+ 'b' # 0x82 -> LATIN SMALL LETTER B
+ 'c' # 0x83 -> LATIN SMALL LETTER C
+ 'd' # 0x84 -> LATIN SMALL LETTER D
+ 'e' # 0x85 -> LATIN SMALL LETTER E
+ 'f' # 0x86 -> LATIN SMALL LETTER F
+ 'g' # 0x87 -> LATIN SMALL LETTER G
+ 'h' # 0x88 -> LATIN SMALL LETTER H
+ 'i' # 0x89 -> LATIN SMALL LETTER I
+ '\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '}' # 0x8C -> RIGHT CURLY BRACKET
+ '`' # 0x8D -> GRAVE ACCENT
+ '\xa6' # 0x8E -> BROKEN BAR
+ '\xb1' # 0x8F -> PLUS-MINUS SIGN
+ '\xb0' # 0x90 -> DEGREE SIGN
+ 'j' # 0x91 -> LATIN SMALL LETTER J
+ 'k' # 0x92 -> LATIN SMALL LETTER K
+ 'l' # 0x93 -> LATIN SMALL LETTER L
+ 'm' # 0x94 -> LATIN SMALL LETTER M
+ 'n' # 0x95 -> LATIN SMALL LETTER N
+ 'o' # 0x96 -> LATIN SMALL LETTER O
+ 'p' # 0x97 -> LATIN SMALL LETTER P
+ 'q' # 0x98 -> LATIN SMALL LETTER Q
+ 'r' # 0x99 -> LATIN SMALL LETTER R
+ '\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR
+ '\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR
+ '\xe6' # 0x9C -> LATIN SMALL LIGATURE AE
+ '\xb8' # 0x9D -> CEDILLA
+ '\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE
+ '\xa4' # 0x9F -> CURRENCY SIGN
+ '\xb5' # 0xA0 -> MICRO SIGN
+ '\xf6' # 0xA1 -> LATIN SMALL LETTER O WITH DIAERESIS
+ 's' # 0xA2 -> LATIN SMALL LETTER S
+ 't' # 0xA3 -> LATIN SMALL LETTER T
+ 'u' # 0xA4 -> LATIN SMALL LETTER U
+ 'v' # 0xA5 -> LATIN SMALL LETTER V
+ 'w' # 0xA6 -> LATIN SMALL LETTER W
+ 'x' # 0xA7 -> LATIN SMALL LETTER X
+ 'y' # 0xA8 -> LATIN SMALL LETTER Y
+ 'z' # 0xA9 -> LATIN SMALL LETTER Z
+ '\xa1' # 0xAA -> INVERTED EXCLAMATION MARK
+ '\xbf' # 0xAB -> INVERTED QUESTION MARK
+ ']' # 0xAC -> RIGHT SQUARE BRACKET
+ '$' # 0xAD -> DOLLAR SIGN
+ '@' # 0xAE -> COMMERCIAL AT
+ '\xae' # 0xAF -> REGISTERED SIGN
+ '\xa2' # 0xB0 -> CENT SIGN
+ '\xa3' # 0xB1 -> POUND SIGN
+ '\xa5' # 0xB2 -> YEN SIGN
+ '\xb7' # 0xB3 -> MIDDLE DOT
+ '\xa9' # 0xB4 -> COPYRIGHT SIGN
+ '\xa7' # 0xB5 -> SECTION SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
+ '\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
+ '\xac' # 0xBA -> NOT SIGN
+ '|' # 0xBB -> VERTICAL LINE
+ '\xaf' # 0xBC -> MACRON
+ '\xa8' # 0xBD -> DIAERESIS
+ '\xb4' # 0xBE -> ACUTE ACCENT
+ '\xd7' # 0xBF -> MULTIPLICATION SIGN
+ '\xe7' # 0xC0 -> LATIN SMALL LETTER C WITH CEDILLA
+ 'A' # 0xC1 -> LATIN CAPITAL LETTER A
+ 'B' # 0xC2 -> LATIN CAPITAL LETTER B
+ 'C' # 0xC3 -> LATIN CAPITAL LETTER C
+ 'D' # 0xC4 -> LATIN CAPITAL LETTER D
+ 'E' # 0xC5 -> LATIN CAPITAL LETTER E
+ 'F' # 0xC6 -> LATIN CAPITAL LETTER F
+ 'G' # 0xC7 -> LATIN CAPITAL LETTER G
+ 'H' # 0xC8 -> LATIN CAPITAL LETTER H
+ 'I' # 0xC9 -> LATIN CAPITAL LETTER I
+ '\xad' # 0xCA -> SOFT HYPHEN
+ '\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '~' # 0xCC -> TILDE
+ '\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE
+ '\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE
+ '\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE
+ '\u011f' # 0xD0 -> LATIN SMALL LETTER G WITH BREVE
+ 'J' # 0xD1 -> LATIN CAPITAL LETTER J
+ 'K' # 0xD2 -> LATIN CAPITAL LETTER K
+ 'L' # 0xD3 -> LATIN CAPITAL LETTER L
+ 'M' # 0xD4 -> LATIN CAPITAL LETTER M
+ 'N' # 0xD5 -> LATIN CAPITAL LETTER N
+ 'O' # 0xD6 -> LATIN CAPITAL LETTER O
+ 'P' # 0xD7 -> LATIN CAPITAL LETTER P
+ 'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
+ 'R' # 0xD9 -> LATIN CAPITAL LETTER R
+ '\xb9' # 0xDA -> SUPERSCRIPT ONE
+ '\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\\' # 0xDC -> REVERSE SOLIDUS
+ '\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE
+ '\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE
+ '\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
+ '\xfc' # 0xE0 -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xf7' # 0xE1 -> DIVISION SIGN
+ 'S' # 0xE2 -> LATIN CAPITAL LETTER S
+ 'T' # 0xE3 -> LATIN CAPITAL LETTER T
+ 'U' # 0xE4 -> LATIN CAPITAL LETTER U
+ 'V' # 0xE5 -> LATIN CAPITAL LETTER V
+ 'W' # 0xE6 -> LATIN CAPITAL LETTER W
+ 'X' # 0xE7 -> LATIN CAPITAL LETTER X
+ 'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
+ '\xb2' # 0xEA -> SUPERSCRIPT TWO
+ '\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '#' # 0xEC -> NUMBER SIGN
+ '\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE
+ '\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE
+ '0' # 0xF0 -> DIGIT ZERO
+ '1' # 0xF1 -> DIGIT ONE
+ '2' # 0xF2 -> DIGIT TWO
+ '3' # 0xF3 -> DIGIT THREE
+ '4' # 0xF4 -> DIGIT FOUR
+ '5' # 0xF5 -> DIGIT FIVE
+ '6' # 0xF6 -> DIGIT SIX
+ '7' # 0xF7 -> DIGIT SEVEN
+ '8' # 0xF8 -> DIGIT EIGHT
+ '9' # 0xF9 -> DIGIT NINE
+ '\xb3' # 0xFA -> SUPERSCRIPT THREE
+ '\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ '"' # 0xFC -> QUOTATION MARK
+ '\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\x9f' # 0xFF -> CONTROL
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp1125.py b/dist/lib/encodings/cp1125.py
new file mode 100644
index 0000000..b1fd69d
--- /dev/null
+++ b/dist/lib/encodings/cp1125.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec for CP1125
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1125',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
+ 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
+ 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
+ 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE
+ 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE
+ 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE
+ 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
+ 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE
+ 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I
+ 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
+ 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA
+ 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL
+ 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM
+ 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN
+ 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O
+ 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE
+ 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER
+ 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES
+ 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE
+ 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U
+ 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF
+ 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA
+ 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE
+ 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE
+ 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA
+ 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
+ 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
+ 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU
+ 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
+ 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E
+ 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU
+ 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA
+ 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A
+ 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE
+ 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE
+ 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE
+ 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE
+ 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE
+ 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE
+ 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE
+ 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I
+ 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I
+ 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA
+ 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL
+ 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM
+ 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN
+ 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O
+ 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER
+ 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES
+ 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE
+ 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U
+ 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF
+ 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA
+ 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE
+ 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE
+ 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA
+ 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
+ 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
+ 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU
+ 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
+ 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E
+ 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU
+ 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA
+ 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO
+ 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO
+ 0x00f2: 0x0490, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+ 0x00f3: 0x0491, # CYRILLIC SMALL LETTER GHE WITH UPTURN
+ 0x00f4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ 0x00f5: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
+ 0x00f6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x00f7: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x00f8: 0x0407, # CYRILLIC CAPITAL LETTER YI
+ 0x00f9: 0x0457, # CYRILLIC SMALL LETTER YI
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x2116, # NUMERO SIGN
+ 0x00fd: 0x00a4, # CURRENCY SIGN
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\u0410' # 0x0080 -> CYRILLIC CAPITAL LETTER A
+ '\u0411' # 0x0081 -> CYRILLIC CAPITAL LETTER BE
+ '\u0412' # 0x0082 -> CYRILLIC CAPITAL LETTER VE
+ '\u0413' # 0x0083 -> CYRILLIC CAPITAL LETTER GHE
+ '\u0414' # 0x0084 -> CYRILLIC CAPITAL LETTER DE
+ '\u0415' # 0x0085 -> CYRILLIC CAPITAL LETTER IE
+ '\u0416' # 0x0086 -> CYRILLIC CAPITAL LETTER ZHE
+ '\u0417' # 0x0087 -> CYRILLIC CAPITAL LETTER ZE
+ '\u0418' # 0x0088 -> CYRILLIC CAPITAL LETTER I
+ '\u0419' # 0x0089 -> CYRILLIC CAPITAL LETTER SHORT I
+ '\u041a' # 0x008a -> CYRILLIC CAPITAL LETTER KA
+ '\u041b' # 0x008b -> CYRILLIC CAPITAL LETTER EL
+ '\u041c' # 0x008c -> CYRILLIC CAPITAL LETTER EM
+ '\u041d' # 0x008d -> CYRILLIC CAPITAL LETTER EN
+ '\u041e' # 0x008e -> CYRILLIC CAPITAL LETTER O
+ '\u041f' # 0x008f -> CYRILLIC CAPITAL LETTER PE
+ '\u0420' # 0x0090 -> CYRILLIC CAPITAL LETTER ER
+ '\u0421' # 0x0091 -> CYRILLIC CAPITAL LETTER ES
+ '\u0422' # 0x0092 -> CYRILLIC CAPITAL LETTER TE
+ '\u0423' # 0x0093 -> CYRILLIC CAPITAL LETTER U
+ '\u0424' # 0x0094 -> CYRILLIC CAPITAL LETTER EF
+ '\u0425' # 0x0095 -> CYRILLIC CAPITAL LETTER HA
+ '\u0426' # 0x0096 -> CYRILLIC CAPITAL LETTER TSE
+ '\u0427' # 0x0097 -> CYRILLIC CAPITAL LETTER CHE
+ '\u0428' # 0x0098 -> CYRILLIC CAPITAL LETTER SHA
+ '\u0429' # 0x0099 -> CYRILLIC CAPITAL LETTER SHCHA
+ '\u042a' # 0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN
+ '\u042b' # 0x009b -> CYRILLIC CAPITAL LETTER YERU
+ '\u042c' # 0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN
+ '\u042d' # 0x009d -> CYRILLIC CAPITAL LETTER E
+ '\u042e' # 0x009e -> CYRILLIC CAPITAL LETTER YU
+ '\u042f' # 0x009f -> CYRILLIC CAPITAL LETTER YA
+ '\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A
+ '\u0431' # 0x00a1 -> CYRILLIC SMALL LETTER BE
+ '\u0432' # 0x00a2 -> CYRILLIC SMALL LETTER VE
+ '\u0433' # 0x00a3 -> CYRILLIC SMALL LETTER GHE
+ '\u0434' # 0x00a4 -> CYRILLIC SMALL LETTER DE
+ '\u0435' # 0x00a5 -> CYRILLIC SMALL LETTER IE
+ '\u0436' # 0x00a6 -> CYRILLIC SMALL LETTER ZHE
+ '\u0437' # 0x00a7 -> CYRILLIC SMALL LETTER ZE
+ '\u0438' # 0x00a8 -> CYRILLIC SMALL LETTER I
+ '\u0439' # 0x00a9 -> CYRILLIC SMALL LETTER SHORT I
+ '\u043a' # 0x00aa -> CYRILLIC SMALL LETTER KA
+ '\u043b' # 0x00ab -> CYRILLIC SMALL LETTER EL
+ '\u043c' # 0x00ac -> CYRILLIC SMALL LETTER EM
+ '\u043d' # 0x00ad -> CYRILLIC SMALL LETTER EN
+ '\u043e' # 0x00ae -> CYRILLIC SMALL LETTER O
+ '\u043f' # 0x00af -> CYRILLIC SMALL LETTER PE
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ '\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ '\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ '\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ '\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ '\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ '\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ '\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ '\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ '\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ '\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ '\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ '\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ '\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ '\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\u258c' # 0x00dd -> LEFT HALF BLOCK
+ '\u2590' # 0x00de -> RIGHT HALF BLOCK
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\u0440' # 0x00e0 -> CYRILLIC SMALL LETTER ER
+ '\u0441' # 0x00e1 -> CYRILLIC SMALL LETTER ES
+ '\u0442' # 0x00e2 -> CYRILLIC SMALL LETTER TE
+ '\u0443' # 0x00e3 -> CYRILLIC SMALL LETTER U
+ '\u0444' # 0x00e4 -> CYRILLIC SMALL LETTER EF
+ '\u0445' # 0x00e5 -> CYRILLIC SMALL LETTER HA
+ '\u0446' # 0x00e6 -> CYRILLIC SMALL LETTER TSE
+ '\u0447' # 0x00e7 -> CYRILLIC SMALL LETTER CHE
+ '\u0448' # 0x00e8 -> CYRILLIC SMALL LETTER SHA
+ '\u0449' # 0x00e9 -> CYRILLIC SMALL LETTER SHCHA
+ '\u044a' # 0x00ea -> CYRILLIC SMALL LETTER HARD SIGN
+ '\u044b' # 0x00eb -> CYRILLIC SMALL LETTER YERU
+ '\u044c' # 0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN
+ '\u044d' # 0x00ed -> CYRILLIC SMALL LETTER E
+ '\u044e' # 0x00ee -> CYRILLIC SMALL LETTER YU
+ '\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA
+ '\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO
+ '\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO
+ '\u0490' # 0x00f2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+ '\u0491' # 0x00f3 -> CYRILLIC SMALL LETTER GHE WITH UPTURN
+ '\u0404' # 0x00f4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ '\u0454' # 0x00f5 -> CYRILLIC SMALL LETTER UKRAINIAN IE
+ '\u0406' # 0x00f6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ '\u0456' # 0x00f7 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ '\u0407' # 0x00f8 -> CYRILLIC CAPITAL LETTER YI
+ '\u0457' # 0x00f9 -> CYRILLIC SMALL LETTER YI
+ '\xb7' # 0x00fa -> MIDDLE DOT
+ '\u221a' # 0x00fb -> SQUARE ROOT
+ '\u2116' # 0x00fc -> NUMERO SIGN
+ '\xa4' # 0x00fd -> CURRENCY SIGN
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a4: 0x00fd, # CURRENCY SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO
+ 0x0404: 0x00f4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ 0x0406: 0x00f6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x0407: 0x00f8, # CYRILLIC CAPITAL LETTER YI
+ 0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A
+ 0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE
+ 0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE
+ 0x0413: 0x0083, # CYRILLIC CAPITAL LETTER GHE
+ 0x0414: 0x0084, # CYRILLIC CAPITAL LETTER DE
+ 0x0415: 0x0085, # CYRILLIC CAPITAL LETTER IE
+ 0x0416: 0x0086, # CYRILLIC CAPITAL LETTER ZHE
+ 0x0417: 0x0087, # CYRILLIC CAPITAL LETTER ZE
+ 0x0418: 0x0088, # CYRILLIC CAPITAL LETTER I
+ 0x0419: 0x0089, # CYRILLIC CAPITAL LETTER SHORT I
+ 0x041a: 0x008a, # CYRILLIC CAPITAL LETTER KA
+ 0x041b: 0x008b, # CYRILLIC CAPITAL LETTER EL
+ 0x041c: 0x008c, # CYRILLIC CAPITAL LETTER EM
+ 0x041d: 0x008d, # CYRILLIC CAPITAL LETTER EN
+ 0x041e: 0x008e, # CYRILLIC CAPITAL LETTER O
+ 0x041f: 0x008f, # CYRILLIC CAPITAL LETTER PE
+ 0x0420: 0x0090, # CYRILLIC CAPITAL LETTER ER
+ 0x0421: 0x0091, # CYRILLIC CAPITAL LETTER ES
+ 0x0422: 0x0092, # CYRILLIC CAPITAL LETTER TE
+ 0x0423: 0x0093, # CYRILLIC CAPITAL LETTER U
+ 0x0424: 0x0094, # CYRILLIC CAPITAL LETTER EF
+ 0x0425: 0x0095, # CYRILLIC CAPITAL LETTER HA
+ 0x0426: 0x0096, # CYRILLIC CAPITAL LETTER TSE
+ 0x0427: 0x0097, # CYRILLIC CAPITAL LETTER CHE
+ 0x0428: 0x0098, # CYRILLIC CAPITAL LETTER SHA
+ 0x0429: 0x0099, # CYRILLIC CAPITAL LETTER SHCHA
+ 0x042a: 0x009a, # CYRILLIC CAPITAL LETTER HARD SIGN
+ 0x042b: 0x009b, # CYRILLIC CAPITAL LETTER YERU
+ 0x042c: 0x009c, # CYRILLIC CAPITAL LETTER SOFT SIGN
+ 0x042d: 0x009d, # CYRILLIC CAPITAL LETTER E
+ 0x042e: 0x009e, # CYRILLIC CAPITAL LETTER YU
+ 0x042f: 0x009f, # CYRILLIC CAPITAL LETTER YA
+ 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A
+ 0x0431: 0x00a1, # CYRILLIC SMALL LETTER BE
+ 0x0432: 0x00a2, # CYRILLIC SMALL LETTER VE
+ 0x0433: 0x00a3, # CYRILLIC SMALL LETTER GHE
+ 0x0434: 0x00a4, # CYRILLIC SMALL LETTER DE
+ 0x0435: 0x00a5, # CYRILLIC SMALL LETTER IE
+ 0x0436: 0x00a6, # CYRILLIC SMALL LETTER ZHE
+ 0x0437: 0x00a7, # CYRILLIC SMALL LETTER ZE
+ 0x0438: 0x00a8, # CYRILLIC SMALL LETTER I
+ 0x0439: 0x00a9, # CYRILLIC SMALL LETTER SHORT I
+ 0x043a: 0x00aa, # CYRILLIC SMALL LETTER KA
+ 0x043b: 0x00ab, # CYRILLIC SMALL LETTER EL
+ 0x043c: 0x00ac, # CYRILLIC SMALL LETTER EM
+ 0x043d: 0x00ad, # CYRILLIC SMALL LETTER EN
+ 0x043e: 0x00ae, # CYRILLIC SMALL LETTER O
+ 0x043f: 0x00af, # CYRILLIC SMALL LETTER PE
+ 0x0440: 0x00e0, # CYRILLIC SMALL LETTER ER
+ 0x0441: 0x00e1, # CYRILLIC SMALL LETTER ES
+ 0x0442: 0x00e2, # CYRILLIC SMALL LETTER TE
+ 0x0443: 0x00e3, # CYRILLIC SMALL LETTER U
+ 0x0444: 0x00e4, # CYRILLIC SMALL LETTER EF
+ 0x0445: 0x00e5, # CYRILLIC SMALL LETTER HA
+ 0x0446: 0x00e6, # CYRILLIC SMALL LETTER TSE
+ 0x0447: 0x00e7, # CYRILLIC SMALL LETTER CHE
+ 0x0448: 0x00e8, # CYRILLIC SMALL LETTER SHA
+ 0x0449: 0x00e9, # CYRILLIC SMALL LETTER SHCHA
+ 0x044a: 0x00ea, # CYRILLIC SMALL LETTER HARD SIGN
+ 0x044b: 0x00eb, # CYRILLIC SMALL LETTER YERU
+ 0x044c: 0x00ec, # CYRILLIC SMALL LETTER SOFT SIGN
+ 0x044d: 0x00ed, # CYRILLIC SMALL LETTER E
+ 0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU
+ 0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA
+ 0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO
+ 0x0454: 0x00f5, # CYRILLIC SMALL LETTER UKRAINIAN IE
+ 0x0456: 0x00f7, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x0457: 0x00f9, # CYRILLIC SMALL LETTER YI
+ 0x0490: 0x00f2, # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+ 0x0491: 0x00f3, # CYRILLIC SMALL LETTER GHE WITH UPTURN
+ 0x2116: 0x00fc, # NUMERO SIGN
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp1140.py b/dist/lib/encodings/cp1140.py
new file mode 100644
index 0000000..0a919d8
--- /dev/null
+++ b/dist/lib/encodings/cp1140.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1140 generated from 'python-mappings/CP1140.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1140',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x9c' # 0x04 -> CONTROL
+ '\t' # 0x05 -> HORIZONTAL TABULATION
+ '\x86' # 0x06 -> CONTROL
+ '\x7f' # 0x07 -> DELETE
+ '\x97' # 0x08 -> CONTROL
+ '\x8d' # 0x09 -> CONTROL
+ '\x8e' # 0x0A -> CONTROL
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x9d' # 0x14 -> CONTROL
+ '\x85' # 0x15 -> CONTROL
+ '\x08' # 0x16 -> BACKSPACE
+ '\x87' # 0x17 -> CONTROL
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x92' # 0x1A -> CONTROL
+ '\x8f' # 0x1B -> CONTROL
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ '\x80' # 0x20 -> CONTROL
+ '\x81' # 0x21 -> CONTROL
+ '\x82' # 0x22 -> CONTROL
+ '\x83' # 0x23 -> CONTROL
+ '\x84' # 0x24 -> CONTROL
+ '\n' # 0x25 -> LINE FEED
+ '\x17' # 0x26 -> END OF TRANSMISSION BLOCK
+ '\x1b' # 0x27 -> ESCAPE
+ '\x88' # 0x28 -> CONTROL
+ '\x89' # 0x29 -> CONTROL
+ '\x8a' # 0x2A -> CONTROL
+ '\x8b' # 0x2B -> CONTROL
+ '\x8c' # 0x2C -> CONTROL
+ '\x05' # 0x2D -> ENQUIRY
+ '\x06' # 0x2E -> ACKNOWLEDGE
+ '\x07' # 0x2F -> BELL
+ '\x90' # 0x30 -> CONTROL
+ '\x91' # 0x31 -> CONTROL
+ '\x16' # 0x32 -> SYNCHRONOUS IDLE
+ '\x93' # 0x33 -> CONTROL
+ '\x94' # 0x34 -> CONTROL
+ '\x95' # 0x35 -> CONTROL
+ '\x96' # 0x36 -> CONTROL
+ '\x04' # 0x37 -> END OF TRANSMISSION
+ '\x98' # 0x38 -> CONTROL
+ '\x99' # 0x39 -> CONTROL
+ '\x9a' # 0x3A -> CONTROL
+ '\x9b' # 0x3B -> CONTROL
+ '\x14' # 0x3C -> DEVICE CONTROL FOUR
+ '\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
+ '\x9e' # 0x3E -> CONTROL
+ '\x1a' # 0x3F -> SUBSTITUTE
+ ' ' # 0x40 -> SPACE
+ '\xa0' # 0x41 -> NO-BREAK SPACE
+ '\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
+ '\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
+ '\xa2' # 0x4A -> CENT SIGN
+ '.' # 0x4B -> FULL STOP
+ '<' # 0x4C -> LESS-THAN SIGN
+ '(' # 0x4D -> LEFT PARENTHESIS
+ '+' # 0x4E -> PLUS SIGN
+ '|' # 0x4F -> VERTICAL LINE
+ '&' # 0x50 -> AMPERSAND
+ '\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
+ '\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
+ '\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
+ '!' # 0x5A -> EXCLAMATION MARK
+ '$' # 0x5B -> DOLLAR SIGN
+ '*' # 0x5C -> ASTERISK
+ ')' # 0x5D -> RIGHT PARENTHESIS
+ ';' # 0x5E -> SEMICOLON
+ '\xac' # 0x5F -> NOT SIGN
+ '-' # 0x60 -> HYPHEN-MINUS
+ '/' # 0x61 -> SOLIDUS
+ '\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
+ '\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xa6' # 0x6A -> BROKEN BAR
+ ',' # 0x6B -> COMMA
+ '%' # 0x6C -> PERCENT SIGN
+ '_' # 0x6D -> LOW LINE
+ '>' # 0x6E -> GREATER-THAN SIGN
+ '?' # 0x6F -> QUESTION MARK
+ '\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
+ '\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
+ '`' # 0x79 -> GRAVE ACCENT
+ ':' # 0x7A -> COLON
+ '#' # 0x7B -> NUMBER SIGN
+ '@' # 0x7C -> COMMERCIAL AT
+ "'" # 0x7D -> APOSTROPHE
+ '=' # 0x7E -> EQUALS SIGN
+ '"' # 0x7F -> QUOTATION MARK
+ '\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
+ 'a' # 0x81 -> LATIN SMALL LETTER A
+ 'b' # 0x82 -> LATIN SMALL LETTER B
+ 'c' # 0x83 -> LATIN SMALL LETTER C
+ 'd' # 0x84 -> LATIN SMALL LETTER D
+ 'e' # 0x85 -> LATIN SMALL LETTER E
+ 'f' # 0x86 -> LATIN SMALL LETTER F
+ 'g' # 0x87 -> LATIN SMALL LETTER G
+ 'h' # 0x88 -> LATIN SMALL LETTER H
+ 'i' # 0x89 -> LATIN SMALL LETTER I
+ '\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC)
+ '\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE
+ '\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC)
+ '\xb1' # 0x8F -> PLUS-MINUS SIGN
+ '\xb0' # 0x90 -> DEGREE SIGN
+ 'j' # 0x91 -> LATIN SMALL LETTER J
+ 'k' # 0x92 -> LATIN SMALL LETTER K
+ 'l' # 0x93 -> LATIN SMALL LETTER L
+ 'm' # 0x94 -> LATIN SMALL LETTER M
+ 'n' # 0x95 -> LATIN SMALL LETTER N
+ 'o' # 0x96 -> LATIN SMALL LETTER O
+ 'p' # 0x97 -> LATIN SMALL LETTER P
+ 'q' # 0x98 -> LATIN SMALL LETTER Q
+ 'r' # 0x99 -> LATIN SMALL LETTER R
+ '\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR
+ '\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR
+ '\xe6' # 0x9C -> LATIN SMALL LIGATURE AE
+ '\xb8' # 0x9D -> CEDILLA
+ '\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE
+ '\u20ac' # 0x9F -> EURO SIGN
+ '\xb5' # 0xA0 -> MICRO SIGN
+ '~' # 0xA1 -> TILDE
+ 's' # 0xA2 -> LATIN SMALL LETTER S
+ 't' # 0xA3 -> LATIN SMALL LETTER T
+ 'u' # 0xA4 -> LATIN SMALL LETTER U
+ 'v' # 0xA5 -> LATIN SMALL LETTER V
+ 'w' # 0xA6 -> LATIN SMALL LETTER W
+ 'x' # 0xA7 -> LATIN SMALL LETTER X
+ 'y' # 0xA8 -> LATIN SMALL LETTER Y
+ 'z' # 0xA9 -> LATIN SMALL LETTER Z
+ '\xa1' # 0xAA -> INVERTED EXCLAMATION MARK
+ '\xbf' # 0xAB -> INVERTED QUESTION MARK
+ '\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC)
+ '\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
+ '\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC)
+ '\xae' # 0xAF -> REGISTERED SIGN
+ '^' # 0xB0 -> CIRCUMFLEX ACCENT
+ '\xa3' # 0xB1 -> POUND SIGN
+ '\xa5' # 0xB2 -> YEN SIGN
+ '\xb7' # 0xB3 -> MIDDLE DOT
+ '\xa9' # 0xB4 -> COPYRIGHT SIGN
+ '\xa7' # 0xB5 -> SECTION SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
+ '\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
+ '[' # 0xBA -> LEFT SQUARE BRACKET
+ ']' # 0xBB -> RIGHT SQUARE BRACKET
+ '\xaf' # 0xBC -> MACRON
+ '\xa8' # 0xBD -> DIAERESIS
+ '\xb4' # 0xBE -> ACUTE ACCENT
+ '\xd7' # 0xBF -> MULTIPLICATION SIGN
+ '{' # 0xC0 -> LEFT CURLY BRACKET
+ 'A' # 0xC1 -> LATIN CAPITAL LETTER A
+ 'B' # 0xC2 -> LATIN CAPITAL LETTER B
+ 'C' # 0xC3 -> LATIN CAPITAL LETTER C
+ 'D' # 0xC4 -> LATIN CAPITAL LETTER D
+ 'E' # 0xC5 -> LATIN CAPITAL LETTER E
+ 'F' # 0xC6 -> LATIN CAPITAL LETTER F
+ 'G' # 0xC7 -> LATIN CAPITAL LETTER G
+ 'H' # 0xC8 -> LATIN CAPITAL LETTER H
+ 'I' # 0xC9 -> LATIN CAPITAL LETTER I
+ '\xad' # 0xCA -> SOFT HYPHEN
+ '\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE
+ '\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE
+ '\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE
+ '}' # 0xD0 -> RIGHT CURLY BRACKET
+ 'J' # 0xD1 -> LATIN CAPITAL LETTER J
+ 'K' # 0xD2 -> LATIN CAPITAL LETTER K
+ 'L' # 0xD3 -> LATIN CAPITAL LETTER L
+ 'M' # 0xD4 -> LATIN CAPITAL LETTER M
+ 'N' # 0xD5 -> LATIN CAPITAL LETTER N
+ 'O' # 0xD6 -> LATIN CAPITAL LETTER O
+ 'P' # 0xD7 -> LATIN CAPITAL LETTER P
+ 'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
+ 'R' # 0xD9 -> LATIN CAPITAL LETTER R
+ '\xb9' # 0xDA -> SUPERSCRIPT ONE
+ '\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE
+ '\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE
+ '\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
+ '\\' # 0xE0 -> REVERSE SOLIDUS
+ '\xf7' # 0xE1 -> DIVISION SIGN
+ 'S' # 0xE2 -> LATIN CAPITAL LETTER S
+ 'T' # 0xE3 -> LATIN CAPITAL LETTER T
+ 'U' # 0xE4 -> LATIN CAPITAL LETTER U
+ 'V' # 0xE5 -> LATIN CAPITAL LETTER V
+ 'W' # 0xE6 -> LATIN CAPITAL LETTER W
+ 'X' # 0xE7 -> LATIN CAPITAL LETTER X
+ 'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
+ '\xb2' # 0xEA -> SUPERSCRIPT TWO
+ '\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE
+ '\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE
+ '0' # 0xF0 -> DIGIT ZERO
+ '1' # 0xF1 -> DIGIT ONE
+ '2' # 0xF2 -> DIGIT TWO
+ '3' # 0xF3 -> DIGIT THREE
+ '4' # 0xF4 -> DIGIT FOUR
+ '5' # 0xF5 -> DIGIT FIVE
+ '6' # 0xF6 -> DIGIT SIX
+ '7' # 0xF7 -> DIGIT SEVEN
+ '8' # 0xF8 -> DIGIT EIGHT
+ '9' # 0xF9 -> DIGIT NINE
+ '\xb3' # 0xFA -> SUPERSCRIPT THREE
+ '\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ '\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\x9f' # 0xFF -> CONTROL
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp1250.py b/dist/lib/encodings/cp1250.py
new file mode 100644
index 0000000..c2c83aa
--- /dev/null
+++ b/dist/lib/encodings/cp1250.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1250 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1250',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\u20ac' # 0x80 -> EURO SIGN
+ '\ufffe' # 0x81 -> UNDEFINED
+ '\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ '\ufffe' # 0x83 -> UNDEFINED
+ '\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ '\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ '\u2020' # 0x86 -> DAGGER
+ '\u2021' # 0x87 -> DOUBLE DAGGER
+ '\ufffe' # 0x88 -> UNDEFINED
+ '\u2030' # 0x89 -> PER MILLE SIGN
+ '\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON
+ '\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ '\u015a' # 0x8C -> LATIN CAPITAL LETTER S WITH ACUTE
+ '\u0164' # 0x8D -> LATIN CAPITAL LETTER T WITH CARON
+ '\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON
+ '\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE
+ '\ufffe' # 0x90 -> UNDEFINED
+ '\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ '\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ '\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ '\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ '\u2022' # 0x95 -> BULLET
+ '\u2013' # 0x96 -> EN DASH
+ '\u2014' # 0x97 -> EM DASH
+ '\ufffe' # 0x98 -> UNDEFINED
+ '\u2122' # 0x99 -> TRADE MARK SIGN
+ '\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON
+ '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ '\u015b' # 0x9C -> LATIN SMALL LETTER S WITH ACUTE
+ '\u0165' # 0x9D -> LATIN SMALL LETTER T WITH CARON
+ '\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON
+ '\u017a' # 0x9F -> LATIN SMALL LETTER Z WITH ACUTE
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\u02c7' # 0xA1 -> CARON
+ '\u02d8' # 0xA2 -> BREVE
+ '\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE
+ '\xa4' # 0xA4 -> CURRENCY SIGN
+ '\u0104' # 0xA5 -> LATIN CAPITAL LETTER A WITH OGONEK
+ '\xa6' # 0xA6 -> BROKEN BAR
+ '\xa7' # 0xA7 -> SECTION SIGN
+ '\xa8' # 0xA8 -> DIAERESIS
+ '\xa9' # 0xA9 -> COPYRIGHT SIGN
+ '\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA
+ '\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xac' # 0xAC -> NOT SIGN
+ '\xad' # 0xAD -> SOFT HYPHEN
+ '\xae' # 0xAE -> REGISTERED SIGN
+ '\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ '\xb0' # 0xB0 -> DEGREE SIGN
+ '\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ '\u02db' # 0xB2 -> OGONEK
+ '\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE
+ '\xb4' # 0xB4 -> ACUTE ACCENT
+ '\xb5' # 0xB5 -> MICRO SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xb7' # 0xB7 -> MIDDLE DOT
+ '\xb8' # 0xB8 -> CEDILLA
+ '\u0105' # 0xB9 -> LATIN SMALL LETTER A WITH OGONEK
+ '\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA
+ '\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u013d' # 0xBC -> LATIN CAPITAL LETTER L WITH CARON
+ '\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT
+ '\u013e' # 0xBE -> LATIN SMALL LETTER L WITH CARON
+ '\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE
+ '\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE
+ '\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
+ '\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE
+ '\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE
+ '\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON
+ '\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK
+ '\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON
+ '\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON
+ '\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
+ '\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
+ '\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON
+ '\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+ '\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xd7' # 0xD7 -> MULTIPLICATION SIGN
+ '\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON
+ '\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE
+ '\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+ '\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
+ '\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA
+ '\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
+ '\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE
+ '\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE
+ '\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE
+ '\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE
+ '\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON
+ '\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+ '\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK
+ '\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON
+ '\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
+ '\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON
+ '\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE
+ '\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE
+ '\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON
+ '\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
+ '\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf7' # 0xF7 -> DIVISION SIGN
+ '\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON
+ '\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE
+ '\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
+ '\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
+ '\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
+ '\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA
+ '\u02d9' # 0xFF -> DOT ABOVE
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp1251.py b/dist/lib/encodings/cp1251.py
new file mode 100644
index 0000000..22bc660
--- /dev/null
+++ b/dist/lib/encodings/cp1251.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1251 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1251',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\u0402' # 0x80 -> CYRILLIC CAPITAL LETTER DJE
+ '\u0403' # 0x81 -> CYRILLIC CAPITAL LETTER GJE
+ '\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ '\u0453' # 0x83 -> CYRILLIC SMALL LETTER GJE
+ '\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ '\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ '\u2020' # 0x86 -> DAGGER
+ '\u2021' # 0x87 -> DOUBLE DAGGER
+ '\u20ac' # 0x88 -> EURO SIGN
+ '\u2030' # 0x89 -> PER MILLE SIGN
+ '\u0409' # 0x8A -> CYRILLIC CAPITAL LETTER LJE
+ '\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ '\u040a' # 0x8C -> CYRILLIC CAPITAL LETTER NJE
+ '\u040c' # 0x8D -> CYRILLIC CAPITAL LETTER KJE
+ '\u040b' # 0x8E -> CYRILLIC CAPITAL LETTER TSHE
+ '\u040f' # 0x8F -> CYRILLIC CAPITAL LETTER DZHE
+ '\u0452' # 0x90 -> CYRILLIC SMALL LETTER DJE
+ '\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ '\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ '\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ '\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ '\u2022' # 0x95 -> BULLET
+ '\u2013' # 0x96 -> EN DASH
+ '\u2014' # 0x97 -> EM DASH
+ '\ufffe' # 0x98 -> UNDEFINED
+ '\u2122' # 0x99 -> TRADE MARK SIGN
+ '\u0459' # 0x9A -> CYRILLIC SMALL LETTER LJE
+ '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ '\u045a' # 0x9C -> CYRILLIC SMALL LETTER NJE
+ '\u045c' # 0x9D -> CYRILLIC SMALL LETTER KJE
+ '\u045b' # 0x9E -> CYRILLIC SMALL LETTER TSHE
+ '\u045f' # 0x9F -> CYRILLIC SMALL LETTER DZHE
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\u040e' # 0xA1 -> CYRILLIC CAPITAL LETTER SHORT U
+ '\u045e' # 0xA2 -> CYRILLIC SMALL LETTER SHORT U
+ '\u0408' # 0xA3 -> CYRILLIC CAPITAL LETTER JE
+ '\xa4' # 0xA4 -> CURRENCY SIGN
+ '\u0490' # 0xA5 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+ '\xa6' # 0xA6 -> BROKEN BAR
+ '\xa7' # 0xA7 -> SECTION SIGN
+ '\u0401' # 0xA8 -> CYRILLIC CAPITAL LETTER IO
+ '\xa9' # 0xA9 -> COPYRIGHT SIGN
+ '\u0404' # 0xAA -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ '\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xac' # 0xAC -> NOT SIGN
+ '\xad' # 0xAD -> SOFT HYPHEN
+ '\xae' # 0xAE -> REGISTERED SIGN
+ '\u0407' # 0xAF -> CYRILLIC CAPITAL LETTER YI
+ '\xb0' # 0xB0 -> DEGREE SIGN
+ '\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ '\u0406' # 0xB2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ '\u0456' # 0xB3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ '\u0491' # 0xB4 -> CYRILLIC SMALL LETTER GHE WITH UPTURN
+ '\xb5' # 0xB5 -> MICRO SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xb7' # 0xB7 -> MIDDLE DOT
+ '\u0451' # 0xB8 -> CYRILLIC SMALL LETTER IO
+ '\u2116' # 0xB9 -> NUMERO SIGN
+ '\u0454' # 0xBA -> CYRILLIC SMALL LETTER UKRAINIAN IE
+ '\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u0458' # 0xBC -> CYRILLIC SMALL LETTER JE
+ '\u0405' # 0xBD -> CYRILLIC CAPITAL LETTER DZE
+ '\u0455' # 0xBE -> CYRILLIC SMALL LETTER DZE
+ '\u0457' # 0xBF -> CYRILLIC SMALL LETTER YI
+ '\u0410' # 0xC0 -> CYRILLIC CAPITAL LETTER A
+ '\u0411' # 0xC1 -> CYRILLIC CAPITAL LETTER BE
+ '\u0412' # 0xC2 -> CYRILLIC CAPITAL LETTER VE
+ '\u0413' # 0xC3 -> CYRILLIC CAPITAL LETTER GHE
+ '\u0414' # 0xC4 -> CYRILLIC CAPITAL LETTER DE
+ '\u0415' # 0xC5 -> CYRILLIC CAPITAL LETTER IE
+ '\u0416' # 0xC6 -> CYRILLIC CAPITAL LETTER ZHE
+ '\u0417' # 0xC7 -> CYRILLIC CAPITAL LETTER ZE
+ '\u0418' # 0xC8 -> CYRILLIC CAPITAL LETTER I
+ '\u0419' # 0xC9 -> CYRILLIC CAPITAL LETTER SHORT I
+ '\u041a' # 0xCA -> CYRILLIC CAPITAL LETTER KA
+ '\u041b' # 0xCB -> CYRILLIC CAPITAL LETTER EL
+ '\u041c' # 0xCC -> CYRILLIC CAPITAL LETTER EM
+ '\u041d' # 0xCD -> CYRILLIC CAPITAL LETTER EN
+ '\u041e' # 0xCE -> CYRILLIC CAPITAL LETTER O
+ '\u041f' # 0xCF -> CYRILLIC CAPITAL LETTER PE
+ '\u0420' # 0xD0 -> CYRILLIC CAPITAL LETTER ER
+ '\u0421' # 0xD1 -> CYRILLIC CAPITAL LETTER ES
+ '\u0422' # 0xD2 -> CYRILLIC CAPITAL LETTER TE
+ '\u0423' # 0xD3 -> CYRILLIC CAPITAL LETTER U
+ '\u0424' # 0xD4 -> CYRILLIC CAPITAL LETTER EF
+ '\u0425' # 0xD5 -> CYRILLIC CAPITAL LETTER HA
+ '\u0426' # 0xD6 -> CYRILLIC CAPITAL LETTER TSE
+ '\u0427' # 0xD7 -> CYRILLIC CAPITAL LETTER CHE
+ '\u0428' # 0xD8 -> CYRILLIC CAPITAL LETTER SHA
+ '\u0429' # 0xD9 -> CYRILLIC CAPITAL LETTER SHCHA
+ '\u042a' # 0xDA -> CYRILLIC CAPITAL LETTER HARD SIGN
+ '\u042b' # 0xDB -> CYRILLIC CAPITAL LETTER YERU
+ '\u042c' # 0xDC -> CYRILLIC CAPITAL LETTER SOFT SIGN
+ '\u042d' # 0xDD -> CYRILLIC CAPITAL LETTER E
+ '\u042e' # 0xDE -> CYRILLIC CAPITAL LETTER YU
+ '\u042f' # 0xDF -> CYRILLIC CAPITAL LETTER YA
+ '\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A
+ '\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE
+ '\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE
+ '\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE
+ '\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE
+ '\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE
+ '\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE
+ '\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE
+ '\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I
+ '\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I
+ '\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA
+ '\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL
+ '\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM
+ '\u043d' # 0xED -> CYRILLIC SMALL LETTER EN
+ '\u043e' # 0xEE -> CYRILLIC SMALL LETTER O
+ '\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE
+ '\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER
+ '\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES
+ '\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE
+ '\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U
+ '\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF
+ '\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA
+ '\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE
+ '\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE
+ '\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA
+ '\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA
+ '\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN
+ '\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU
+ '\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN
+ '\u044d' # 0xFD -> CYRILLIC SMALL LETTER E
+ '\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU
+ '\u044f' # 0xFF -> CYRILLIC SMALL LETTER YA
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp1252.py b/dist/lib/encodings/cp1252.py
new file mode 100644
index 0000000..c0e8088
--- /dev/null
+++ b/dist/lib/encodings/cp1252.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1252 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1252',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\u20ac' # 0x80 -> EURO SIGN
+ '\ufffe' # 0x81 -> UNDEFINED
+ '\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ '\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
+ '\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ '\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ '\u2020' # 0x86 -> DAGGER
+ '\u2021' # 0x87 -> DOUBLE DAGGER
+ '\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+ '\u2030' # 0x89 -> PER MILLE SIGN
+ '\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON
+ '\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ '\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE
+ '\ufffe' # 0x8D -> UNDEFINED
+ '\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON
+ '\ufffe' # 0x8F -> UNDEFINED
+ '\ufffe' # 0x90 -> UNDEFINED
+ '\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ '\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ '\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ '\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ '\u2022' # 0x95 -> BULLET
+ '\u2013' # 0x96 -> EN DASH
+ '\u2014' # 0x97 -> EM DASH
+ '\u02dc' # 0x98 -> SMALL TILDE
+ '\u2122' # 0x99 -> TRADE MARK SIGN
+ '\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON
+ '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ '\u0153' # 0x9C -> LATIN SMALL LIGATURE OE
+ '\ufffe' # 0x9D -> UNDEFINED
+ '\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON
+ '\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
+ '\xa2' # 0xA2 -> CENT SIGN
+ '\xa3' # 0xA3 -> POUND SIGN
+ '\xa4' # 0xA4 -> CURRENCY SIGN
+ '\xa5' # 0xA5 -> YEN SIGN
+ '\xa6' # 0xA6 -> BROKEN BAR
+ '\xa7' # 0xA7 -> SECTION SIGN
+ '\xa8' # 0xA8 -> DIAERESIS
+ '\xa9' # 0xA9 -> COPYRIGHT SIGN
+ '\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR
+ '\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xac' # 0xAC -> NOT SIGN
+ '\xad' # 0xAD -> SOFT HYPHEN
+ '\xae' # 0xAE -> REGISTERED SIGN
+ '\xaf' # 0xAF -> MACRON
+ '\xb0' # 0xB0 -> DEGREE SIGN
+ '\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ '\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ '\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ '\xb4' # 0xB4 -> ACUTE ACCENT
+ '\xb5' # 0xB5 -> MICRO SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xb7' # 0xB7 -> MIDDLE DOT
+ '\xb8' # 0xB8 -> CEDILLA
+ '\xb9' # 0xB9 -> SUPERSCRIPT ONE
+ '\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR
+ '\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ '\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
+ '\xbf' # 0xBF -> INVERTED QUESTION MARK
+ '\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
+ '\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
+ '\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
+ '\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH
+ '\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
+ '\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+ '\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xd7' # 0xD7 -> MULTIPLICATION SIGN
+ '\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+ '\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ '\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
+ '\xde' # 0xDE -> LATIN CAPITAL LETTER THORN
+ '\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
+ '\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE
+ '\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xe6' # 0xE6 -> LATIN SMALL LETTER AE
+ '\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+ '\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE
+ '\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
+ '\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xf0' # 0xF0 -> LATIN SMALL LETTER ETH
+ '\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE
+ '\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE
+ '\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
+ '\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf7' # 0xF7 -> DIVISION SIGN
+ '\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
+ '\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+ '\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
+ '\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
+ '\xfe' # 0xFE -> LATIN SMALL LETTER THORN
+ '\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp1253.py b/dist/lib/encodings/cp1253.py
new file mode 100644
index 0000000..ec9c097
--- /dev/null
+++ b/dist/lib/encodings/cp1253.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1253 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1253',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\u20ac' # 0x80 -> EURO SIGN
+ '\ufffe' # 0x81 -> UNDEFINED
+ '\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ '\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
+ '\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ '\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ '\u2020' # 0x86 -> DAGGER
+ '\u2021' # 0x87 -> DOUBLE DAGGER
+ '\ufffe' # 0x88 -> UNDEFINED
+ '\u2030' # 0x89 -> PER MILLE SIGN
+ '\ufffe' # 0x8A -> UNDEFINED
+ '\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ '\ufffe' # 0x8C -> UNDEFINED
+ '\ufffe' # 0x8D -> UNDEFINED
+ '\ufffe' # 0x8E -> UNDEFINED
+ '\ufffe' # 0x8F -> UNDEFINED
+ '\ufffe' # 0x90 -> UNDEFINED
+ '\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ '\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ '\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ '\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ '\u2022' # 0x95 -> BULLET
+ '\u2013' # 0x96 -> EN DASH
+ '\u2014' # 0x97 -> EM DASH
+ '\ufffe' # 0x98 -> UNDEFINED
+ '\u2122' # 0x99 -> TRADE MARK SIGN
+ '\ufffe' # 0x9A -> UNDEFINED
+ '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ '\ufffe' # 0x9C -> UNDEFINED
+ '\ufffe' # 0x9D -> UNDEFINED
+ '\ufffe' # 0x9E -> UNDEFINED
+ '\ufffe' # 0x9F -> UNDEFINED
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\u0385' # 0xA1 -> GREEK DIALYTIKA TONOS
+ '\u0386' # 0xA2 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
+ '\xa3' # 0xA3 -> POUND SIGN
+ '\xa4' # 0xA4 -> CURRENCY SIGN
+ '\xa5' # 0xA5 -> YEN SIGN
+ '\xa6' # 0xA6 -> BROKEN BAR
+ '\xa7' # 0xA7 -> SECTION SIGN
+ '\xa8' # 0xA8 -> DIAERESIS
+ '\xa9' # 0xA9 -> COPYRIGHT SIGN
+ '\ufffe' # 0xAA -> UNDEFINED
+ '\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xac' # 0xAC -> NOT SIGN
+ '\xad' # 0xAD -> SOFT HYPHEN
+ '\xae' # 0xAE -> REGISTERED SIGN
+ '\u2015' # 0xAF -> HORIZONTAL BAR
+ '\xb0' # 0xB0 -> DEGREE SIGN
+ '\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ '\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ '\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ '\u0384' # 0xB4 -> GREEK TONOS
+ '\xb5' # 0xB5 -> MICRO SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xb7' # 0xB7 -> MIDDLE DOT
+ '\u0388' # 0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS
+ '\u0389' # 0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS
+ '\u038a' # 0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS
+ '\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u038c' # 0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS
+ '\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ '\u038e' # 0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS
+ '\u038f' # 0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS
+ '\u0390' # 0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+ '\u0391' # 0xC1 -> GREEK CAPITAL LETTER ALPHA
+ '\u0392' # 0xC2 -> GREEK CAPITAL LETTER BETA
+ '\u0393' # 0xC3 -> GREEK CAPITAL LETTER GAMMA
+ '\u0394' # 0xC4 -> GREEK CAPITAL LETTER DELTA
+ '\u0395' # 0xC5 -> GREEK CAPITAL LETTER EPSILON
+ '\u0396' # 0xC6 -> GREEK CAPITAL LETTER ZETA
+ '\u0397' # 0xC7 -> GREEK CAPITAL LETTER ETA
+ '\u0398' # 0xC8 -> GREEK CAPITAL LETTER THETA
+ '\u0399' # 0xC9 -> GREEK CAPITAL LETTER IOTA
+ '\u039a' # 0xCA -> GREEK CAPITAL LETTER KAPPA
+ '\u039b' # 0xCB -> GREEK CAPITAL LETTER LAMDA
+ '\u039c' # 0xCC -> GREEK CAPITAL LETTER MU
+ '\u039d' # 0xCD -> GREEK CAPITAL LETTER NU
+ '\u039e' # 0xCE -> GREEK CAPITAL LETTER XI
+ '\u039f' # 0xCF -> GREEK CAPITAL LETTER OMICRON
+ '\u03a0' # 0xD0 -> GREEK CAPITAL LETTER PI
+ '\u03a1' # 0xD1 -> GREEK CAPITAL LETTER RHO
+ '\ufffe' # 0xD2 -> UNDEFINED
+ '\u03a3' # 0xD3 -> GREEK CAPITAL LETTER SIGMA
+ '\u03a4' # 0xD4 -> GREEK CAPITAL LETTER TAU
+ '\u03a5' # 0xD5 -> GREEK CAPITAL LETTER UPSILON
+ '\u03a6' # 0xD6 -> GREEK CAPITAL LETTER PHI
+ '\u03a7' # 0xD7 -> GREEK CAPITAL LETTER CHI
+ '\u03a8' # 0xD8 -> GREEK CAPITAL LETTER PSI
+ '\u03a9' # 0xD9 -> GREEK CAPITAL LETTER OMEGA
+ '\u03aa' # 0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ '\u03ab' # 0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ '\u03ac' # 0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS
+ '\u03ad' # 0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS
+ '\u03ae' # 0xDE -> GREEK SMALL LETTER ETA WITH TONOS
+ '\u03af' # 0xDF -> GREEK SMALL LETTER IOTA WITH TONOS
+ '\u03b0' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+ '\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA
+ '\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA
+ '\u03b3' # 0xE3 -> GREEK SMALL LETTER GAMMA
+ '\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA
+ '\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON
+ '\u03b6' # 0xE6 -> GREEK SMALL LETTER ZETA
+ '\u03b7' # 0xE7 -> GREEK SMALL LETTER ETA
+ '\u03b8' # 0xE8 -> GREEK SMALL LETTER THETA
+ '\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA
+ '\u03ba' # 0xEA -> GREEK SMALL LETTER KAPPA
+ '\u03bb' # 0xEB -> GREEK SMALL LETTER LAMDA
+ '\u03bc' # 0xEC -> GREEK SMALL LETTER MU
+ '\u03bd' # 0xED -> GREEK SMALL LETTER NU
+ '\u03be' # 0xEE -> GREEK SMALL LETTER XI
+ '\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON
+ '\u03c0' # 0xF0 -> GREEK SMALL LETTER PI
+ '\u03c1' # 0xF1 -> GREEK SMALL LETTER RHO
+ '\u03c2' # 0xF2 -> GREEK SMALL LETTER FINAL SIGMA
+ '\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA
+ '\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU
+ '\u03c5' # 0xF5 -> GREEK SMALL LETTER UPSILON
+ '\u03c6' # 0xF6 -> GREEK SMALL LETTER PHI
+ '\u03c7' # 0xF7 -> GREEK SMALL LETTER CHI
+ '\u03c8' # 0xF8 -> GREEK SMALL LETTER PSI
+ '\u03c9' # 0xF9 -> GREEK SMALL LETTER OMEGA
+ '\u03ca' # 0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ '\u03cb' # 0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ '\u03cc' # 0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS
+ '\u03cd' # 0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS
+ '\u03ce' # 0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS
+ '\ufffe' # 0xFF -> UNDEFINED
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp1254.py b/dist/lib/encodings/cp1254.py
new file mode 100644
index 0000000..4912327
--- /dev/null
+++ b/dist/lib/encodings/cp1254.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1254 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1254',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\u20ac' # 0x80 -> EURO SIGN
+ '\ufffe' # 0x81 -> UNDEFINED
+ '\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ '\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
+ '\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ '\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ '\u2020' # 0x86 -> DAGGER
+ '\u2021' # 0x87 -> DOUBLE DAGGER
+ '\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+ '\u2030' # 0x89 -> PER MILLE SIGN
+ '\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON
+ '\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ '\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE
+ '\ufffe' # 0x8D -> UNDEFINED
+ '\ufffe' # 0x8E -> UNDEFINED
+ '\ufffe' # 0x8F -> UNDEFINED
+ '\ufffe' # 0x90 -> UNDEFINED
+ '\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ '\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ '\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ '\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ '\u2022' # 0x95 -> BULLET
+ '\u2013' # 0x96 -> EN DASH
+ '\u2014' # 0x97 -> EM DASH
+ '\u02dc' # 0x98 -> SMALL TILDE
+ '\u2122' # 0x99 -> TRADE MARK SIGN
+ '\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON
+ '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ '\u0153' # 0x9C -> LATIN SMALL LIGATURE OE
+ '\ufffe' # 0x9D -> UNDEFINED
+ '\ufffe' # 0x9E -> UNDEFINED
+ '\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
+ '\xa2' # 0xA2 -> CENT SIGN
+ '\xa3' # 0xA3 -> POUND SIGN
+ '\xa4' # 0xA4 -> CURRENCY SIGN
+ '\xa5' # 0xA5 -> YEN SIGN
+ '\xa6' # 0xA6 -> BROKEN BAR
+ '\xa7' # 0xA7 -> SECTION SIGN
+ '\xa8' # 0xA8 -> DIAERESIS
+ '\xa9' # 0xA9 -> COPYRIGHT SIGN
+ '\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR
+ '\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xac' # 0xAC -> NOT SIGN
+ '\xad' # 0xAD -> SOFT HYPHEN
+ '\xae' # 0xAE -> REGISTERED SIGN
+ '\xaf' # 0xAF -> MACRON
+ '\xb0' # 0xB0 -> DEGREE SIGN
+ '\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ '\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ '\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ '\xb4' # 0xB4 -> ACUTE ACCENT
+ '\xb5' # 0xB5 -> MICRO SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xb7' # 0xB7 -> MIDDLE DOT
+ '\xb8' # 0xB8 -> CEDILLA
+ '\xb9' # 0xB9 -> SUPERSCRIPT ONE
+ '\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR
+ '\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ '\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
+ '\xbf' # 0xBF -> INVERTED QUESTION MARK
+ '\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
+ '\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
+ '\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
+ '\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE
+ '\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
+ '\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+ '\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xd7' # 0xD7 -> MULTIPLICATION SIGN
+ '\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+ '\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ '\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE
+ '\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA
+ '\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
+ '\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE
+ '\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xe6' # 0xE6 -> LATIN SMALL LETTER AE
+ '\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+ '\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE
+ '\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
+ '\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE
+ '\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE
+ '\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE
+ '\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
+ '\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf7' # 0xF7 -> DIVISION SIGN
+ '\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
+ '\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+ '\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
+ '\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I
+ '\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA
+ '\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp1255.py b/dist/lib/encodings/cp1255.py
new file mode 100644
index 0000000..91ce26b
--- /dev/null
+++ b/dist/lib/encodings/cp1255.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1255 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1255',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\u20ac' # 0x80 -> EURO SIGN
+ '\ufffe' # 0x81 -> UNDEFINED
+ '\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ '\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
+ '\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ '\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ '\u2020' # 0x86 -> DAGGER
+ '\u2021' # 0x87 -> DOUBLE DAGGER
+ '\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+ '\u2030' # 0x89 -> PER MILLE SIGN
+ '\ufffe' # 0x8A -> UNDEFINED
+ '\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ '\ufffe' # 0x8C -> UNDEFINED
+ '\ufffe' # 0x8D -> UNDEFINED
+ '\ufffe' # 0x8E -> UNDEFINED
+ '\ufffe' # 0x8F -> UNDEFINED
+ '\ufffe' # 0x90 -> UNDEFINED
+ '\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ '\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ '\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ '\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ '\u2022' # 0x95 -> BULLET
+ '\u2013' # 0x96 -> EN DASH
+ '\u2014' # 0x97 -> EM DASH
+ '\u02dc' # 0x98 -> SMALL TILDE
+ '\u2122' # 0x99 -> TRADE MARK SIGN
+ '\ufffe' # 0x9A -> UNDEFINED
+ '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ '\ufffe' # 0x9C -> UNDEFINED
+ '\ufffe' # 0x9D -> UNDEFINED
+ '\ufffe' # 0x9E -> UNDEFINED
+ '\ufffe' # 0x9F -> UNDEFINED
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
+ '\xa2' # 0xA2 -> CENT SIGN
+ '\xa3' # 0xA3 -> POUND SIGN
+ '\u20aa' # 0xA4 -> NEW SHEQEL SIGN
+ '\xa5' # 0xA5 -> YEN SIGN
+ '\xa6' # 0xA6 -> BROKEN BAR
+ '\xa7' # 0xA7 -> SECTION SIGN
+ '\xa8' # 0xA8 -> DIAERESIS
+ '\xa9' # 0xA9 -> COPYRIGHT SIGN
+ '\xd7' # 0xAA -> MULTIPLICATION SIGN
+ '\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xac' # 0xAC -> NOT SIGN
+ '\xad' # 0xAD -> SOFT HYPHEN
+ '\xae' # 0xAE -> REGISTERED SIGN
+ '\xaf' # 0xAF -> MACRON
+ '\xb0' # 0xB0 -> DEGREE SIGN
+ '\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ '\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ '\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ '\xb4' # 0xB4 -> ACUTE ACCENT
+ '\xb5' # 0xB5 -> MICRO SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xb7' # 0xB7 -> MIDDLE DOT
+ '\xb8' # 0xB8 -> CEDILLA
+ '\xb9' # 0xB9 -> SUPERSCRIPT ONE
+ '\xf7' # 0xBA -> DIVISION SIGN
+ '\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ '\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
+ '\xbf' # 0xBF -> INVERTED QUESTION MARK
+ '\u05b0' # 0xC0 -> HEBREW POINT SHEVA
+ '\u05b1' # 0xC1 -> HEBREW POINT HATAF SEGOL
+ '\u05b2' # 0xC2 -> HEBREW POINT HATAF PATAH
+ '\u05b3' # 0xC3 -> HEBREW POINT HATAF QAMATS
+ '\u05b4' # 0xC4 -> HEBREW POINT HIRIQ
+ '\u05b5' # 0xC5 -> HEBREW POINT TSERE
+ '\u05b6' # 0xC6 -> HEBREW POINT SEGOL
+ '\u05b7' # 0xC7 -> HEBREW POINT PATAH
+ '\u05b8' # 0xC8 -> HEBREW POINT QAMATS
+ '\u05b9' # 0xC9 -> HEBREW POINT HOLAM
+ '\ufffe' # 0xCA -> UNDEFINED
+ '\u05bb' # 0xCB -> HEBREW POINT QUBUTS
+ '\u05bc' # 0xCC -> HEBREW POINT DAGESH OR MAPIQ
+ '\u05bd' # 0xCD -> HEBREW POINT METEG
+ '\u05be' # 0xCE -> HEBREW PUNCTUATION MAQAF
+ '\u05bf' # 0xCF -> HEBREW POINT RAFE
+ '\u05c0' # 0xD0 -> HEBREW PUNCTUATION PASEQ
+ '\u05c1' # 0xD1 -> HEBREW POINT SHIN DOT
+ '\u05c2' # 0xD2 -> HEBREW POINT SIN DOT
+ '\u05c3' # 0xD3 -> HEBREW PUNCTUATION SOF PASUQ
+ '\u05f0' # 0xD4 -> HEBREW LIGATURE YIDDISH DOUBLE VAV
+ '\u05f1' # 0xD5 -> HEBREW LIGATURE YIDDISH VAV YOD
+ '\u05f2' # 0xD6 -> HEBREW LIGATURE YIDDISH DOUBLE YOD
+ '\u05f3' # 0xD7 -> HEBREW PUNCTUATION GERESH
+ '\u05f4' # 0xD8 -> HEBREW PUNCTUATION GERSHAYIM
+ '\ufffe' # 0xD9 -> UNDEFINED
+ '\ufffe' # 0xDA -> UNDEFINED
+ '\ufffe' # 0xDB -> UNDEFINED
+ '\ufffe' # 0xDC -> UNDEFINED
+ '\ufffe' # 0xDD -> UNDEFINED
+ '\ufffe' # 0xDE -> UNDEFINED
+ '\ufffe' # 0xDF -> UNDEFINED
+ '\u05d0' # 0xE0 -> HEBREW LETTER ALEF
+ '\u05d1' # 0xE1 -> HEBREW LETTER BET
+ '\u05d2' # 0xE2 -> HEBREW LETTER GIMEL
+ '\u05d3' # 0xE3 -> HEBREW LETTER DALET
+ '\u05d4' # 0xE4 -> HEBREW LETTER HE
+ '\u05d5' # 0xE5 -> HEBREW LETTER VAV
+ '\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN
+ '\u05d7' # 0xE7 -> HEBREW LETTER HET
+ '\u05d8' # 0xE8 -> HEBREW LETTER TET
+ '\u05d9' # 0xE9 -> HEBREW LETTER YOD
+ '\u05da' # 0xEA -> HEBREW LETTER FINAL KAF
+ '\u05db' # 0xEB -> HEBREW LETTER KAF
+ '\u05dc' # 0xEC -> HEBREW LETTER LAMED
+ '\u05dd' # 0xED -> HEBREW LETTER FINAL MEM
+ '\u05de' # 0xEE -> HEBREW LETTER MEM
+ '\u05df' # 0xEF -> HEBREW LETTER FINAL NUN
+ '\u05e0' # 0xF0 -> HEBREW LETTER NUN
+ '\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH
+ '\u05e2' # 0xF2 -> HEBREW LETTER AYIN
+ '\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE
+ '\u05e4' # 0xF4 -> HEBREW LETTER PE
+ '\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI
+ '\u05e6' # 0xF6 -> HEBREW LETTER TSADI
+ '\u05e7' # 0xF7 -> HEBREW LETTER QOF
+ '\u05e8' # 0xF8 -> HEBREW LETTER RESH
+ '\u05e9' # 0xF9 -> HEBREW LETTER SHIN
+ '\u05ea' # 0xFA -> HEBREW LETTER TAV
+ '\ufffe' # 0xFB -> UNDEFINED
+ '\ufffe' # 0xFC -> UNDEFINED
+ '\u200e' # 0xFD -> LEFT-TO-RIGHT MARK
+ '\u200f' # 0xFE -> RIGHT-TO-LEFT MARK
+ '\ufffe' # 0xFF -> UNDEFINED
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp1256.py b/dist/lib/encodings/cp1256.py
new file mode 100644
index 0000000..fd6afab
--- /dev/null
+++ b/dist/lib/encodings/cp1256.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1256 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1256',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\u20ac' # 0x80 -> EURO SIGN
+ '\u067e' # 0x81 -> ARABIC LETTER PEH
+ '\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ '\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
+ '\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ '\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ '\u2020' # 0x86 -> DAGGER
+ '\u2021' # 0x87 -> DOUBLE DAGGER
+ '\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+ '\u2030' # 0x89 -> PER MILLE SIGN
+ '\u0679' # 0x8A -> ARABIC LETTER TTEH
+ '\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ '\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE
+ '\u0686' # 0x8D -> ARABIC LETTER TCHEH
+ '\u0698' # 0x8E -> ARABIC LETTER JEH
+ '\u0688' # 0x8F -> ARABIC LETTER DDAL
+ '\u06af' # 0x90 -> ARABIC LETTER GAF
+ '\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ '\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ '\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ '\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ '\u2022' # 0x95 -> BULLET
+ '\u2013' # 0x96 -> EN DASH
+ '\u2014' # 0x97 -> EM DASH
+ '\u06a9' # 0x98 -> ARABIC LETTER KEHEH
+ '\u2122' # 0x99 -> TRADE MARK SIGN
+ '\u0691' # 0x9A -> ARABIC LETTER RREH
+ '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ '\u0153' # 0x9C -> LATIN SMALL LIGATURE OE
+ '\u200c' # 0x9D -> ZERO WIDTH NON-JOINER
+ '\u200d' # 0x9E -> ZERO WIDTH JOINER
+ '\u06ba' # 0x9F -> ARABIC LETTER NOON GHUNNA
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\u060c' # 0xA1 -> ARABIC COMMA
+ '\xa2' # 0xA2 -> CENT SIGN
+ '\xa3' # 0xA3 -> POUND SIGN
+ '\xa4' # 0xA4 -> CURRENCY SIGN
+ '\xa5' # 0xA5 -> YEN SIGN
+ '\xa6' # 0xA6 -> BROKEN BAR
+ '\xa7' # 0xA7 -> SECTION SIGN
+ '\xa8' # 0xA8 -> DIAERESIS
+ '\xa9' # 0xA9 -> COPYRIGHT SIGN
+ '\u06be' # 0xAA -> ARABIC LETTER HEH DOACHASHMEE
+ '\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xac' # 0xAC -> NOT SIGN
+ '\xad' # 0xAD -> SOFT HYPHEN
+ '\xae' # 0xAE -> REGISTERED SIGN
+ '\xaf' # 0xAF -> MACRON
+ '\xb0' # 0xB0 -> DEGREE SIGN
+ '\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ '\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ '\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ '\xb4' # 0xB4 -> ACUTE ACCENT
+ '\xb5' # 0xB5 -> MICRO SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xb7' # 0xB7 -> MIDDLE DOT
+ '\xb8' # 0xB8 -> CEDILLA
+ '\xb9' # 0xB9 -> SUPERSCRIPT ONE
+ '\u061b' # 0xBA -> ARABIC SEMICOLON
+ '\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ '\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
+ '\u061f' # 0xBF -> ARABIC QUESTION MARK
+ '\u06c1' # 0xC0 -> ARABIC LETTER HEH GOAL
+ '\u0621' # 0xC1 -> ARABIC LETTER HAMZA
+ '\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE
+ '\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE
+ '\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE
+ '\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW
+ '\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE
+ '\u0627' # 0xC7 -> ARABIC LETTER ALEF
+ '\u0628' # 0xC8 -> ARABIC LETTER BEH
+ '\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA
+ '\u062a' # 0xCA -> ARABIC LETTER TEH
+ '\u062b' # 0xCB -> ARABIC LETTER THEH
+ '\u062c' # 0xCC -> ARABIC LETTER JEEM
+ '\u062d' # 0xCD -> ARABIC LETTER HAH
+ '\u062e' # 0xCE -> ARABIC LETTER KHAH
+ '\u062f' # 0xCF -> ARABIC LETTER DAL
+ '\u0630' # 0xD0 -> ARABIC LETTER THAL
+ '\u0631' # 0xD1 -> ARABIC LETTER REH
+ '\u0632' # 0xD2 -> ARABIC LETTER ZAIN
+ '\u0633' # 0xD3 -> ARABIC LETTER SEEN
+ '\u0634' # 0xD4 -> ARABIC LETTER SHEEN
+ '\u0635' # 0xD5 -> ARABIC LETTER SAD
+ '\u0636' # 0xD6 -> ARABIC LETTER DAD
+ '\xd7' # 0xD7 -> MULTIPLICATION SIGN
+ '\u0637' # 0xD8 -> ARABIC LETTER TAH
+ '\u0638' # 0xD9 -> ARABIC LETTER ZAH
+ '\u0639' # 0xDA -> ARABIC LETTER AIN
+ '\u063a' # 0xDB -> ARABIC LETTER GHAIN
+ '\u0640' # 0xDC -> ARABIC TATWEEL
+ '\u0641' # 0xDD -> ARABIC LETTER FEH
+ '\u0642' # 0xDE -> ARABIC LETTER QAF
+ '\u0643' # 0xDF -> ARABIC LETTER KAF
+ '\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+ '\u0644' # 0xE1 -> ARABIC LETTER LAM
+ '\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\u0645' # 0xE3 -> ARABIC LETTER MEEM
+ '\u0646' # 0xE4 -> ARABIC LETTER NOON
+ '\u0647' # 0xE5 -> ARABIC LETTER HEH
+ '\u0648' # 0xE6 -> ARABIC LETTER WAW
+ '\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+ '\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\u0649' # 0xEC -> ARABIC LETTER ALEF MAKSURA
+ '\u064a' # 0xED -> ARABIC LETTER YEH
+ '\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\u064b' # 0xF0 -> ARABIC FATHATAN
+ '\u064c' # 0xF1 -> ARABIC DAMMATAN
+ '\u064d' # 0xF2 -> ARABIC KASRATAN
+ '\u064e' # 0xF3 -> ARABIC FATHA
+ '\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\u064f' # 0xF5 -> ARABIC DAMMA
+ '\u0650' # 0xF6 -> ARABIC KASRA
+ '\xf7' # 0xF7 -> DIVISION SIGN
+ '\u0651' # 0xF8 -> ARABIC SHADDA
+ '\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+ '\u0652' # 0xFA -> ARABIC SUKUN
+ '\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\u200e' # 0xFD -> LEFT-TO-RIGHT MARK
+ '\u200f' # 0xFE -> RIGHT-TO-LEFT MARK
+ '\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp1257.py b/dist/lib/encodings/cp1257.py
new file mode 100644
index 0000000..9ebc90d
--- /dev/null
+++ b/dist/lib/encodings/cp1257.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1257 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1257',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\u20ac' # 0x80 -> EURO SIGN
+ '\ufffe' # 0x81 -> UNDEFINED
+ '\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ '\ufffe' # 0x83 -> UNDEFINED
+ '\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ '\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ '\u2020' # 0x86 -> DAGGER
+ '\u2021' # 0x87 -> DOUBLE DAGGER
+ '\ufffe' # 0x88 -> UNDEFINED
+ '\u2030' # 0x89 -> PER MILLE SIGN
+ '\ufffe' # 0x8A -> UNDEFINED
+ '\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ '\ufffe' # 0x8C -> UNDEFINED
+ '\xa8' # 0x8D -> DIAERESIS
+ '\u02c7' # 0x8E -> CARON
+ '\xb8' # 0x8F -> CEDILLA
+ '\ufffe' # 0x90 -> UNDEFINED
+ '\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ '\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ '\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ '\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ '\u2022' # 0x95 -> BULLET
+ '\u2013' # 0x96 -> EN DASH
+ '\u2014' # 0x97 -> EM DASH
+ '\ufffe' # 0x98 -> UNDEFINED
+ '\u2122' # 0x99 -> TRADE MARK SIGN
+ '\ufffe' # 0x9A -> UNDEFINED
+ '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ '\ufffe' # 0x9C -> UNDEFINED
+ '\xaf' # 0x9D -> MACRON
+ '\u02db' # 0x9E -> OGONEK
+ '\ufffe' # 0x9F -> UNDEFINED
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\ufffe' # 0xA1 -> UNDEFINED
+ '\xa2' # 0xA2 -> CENT SIGN
+ '\xa3' # 0xA3 -> POUND SIGN
+ '\xa4' # 0xA4 -> CURRENCY SIGN
+ '\ufffe' # 0xA5 -> UNDEFINED
+ '\xa6' # 0xA6 -> BROKEN BAR
+ '\xa7' # 0xA7 -> SECTION SIGN
+ '\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE
+ '\xa9' # 0xA9 -> COPYRIGHT SIGN
+ '\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA
+ '\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xac' # 0xAC -> NOT SIGN
+ '\xad' # 0xAD -> SOFT HYPHEN
+ '\xae' # 0xAE -> REGISTERED SIGN
+ '\xc6' # 0xAF -> LATIN CAPITAL LETTER AE
+ '\xb0' # 0xB0 -> DEGREE SIGN
+ '\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ '\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ '\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ '\xb4' # 0xB4 -> ACUTE ACCENT
+ '\xb5' # 0xB5 -> MICRO SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xb7' # 0xB7 -> MIDDLE DOT
+ '\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE
+ '\xb9' # 0xB9 -> SUPERSCRIPT ONE
+ '\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA
+ '\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ '\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
+ '\xe6' # 0xBF -> LATIN SMALL LETTER AE
+ '\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK
+ '\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK
+ '\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON
+ '\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE
+ '\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK
+ '\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON
+ '\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON
+ '\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE
+ '\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE
+ '\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA
+ '\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA
+ '\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON
+ '\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA
+ '\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON
+ '\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
+ '\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA
+ '\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON
+ '\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+ '\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xd7' # 0xD7 -> MULTIPLICATION SIGN
+ '\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK
+ '\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE
+ '\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE
+ '\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON
+ '\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ '\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON
+ '\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
+ '\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK
+ '\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK
+ '\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON
+ '\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE
+ '\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK
+ '\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON
+ '\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON
+ '\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+ '\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE
+ '\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE
+ '\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA
+ '\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA
+ '\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON
+ '\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA
+ '\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON
+ '\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE
+ '\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA
+ '\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+ '\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON
+ '\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
+ '\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf7' # 0xF7 -> DIVISION SIGN
+ '\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK
+ '\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE
+ '\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE
+ '\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON
+ '\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE
+ '\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON
+ '\u02d9' # 0xFF -> DOT ABOVE
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp1258.py b/dist/lib/encodings/cp1258.py
new file mode 100644
index 0000000..784378a
--- /dev/null
+++ b/dist/lib/encodings/cp1258.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp1258 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp1258',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\u20ac' # 0x80 -> EURO SIGN
+ '\ufffe' # 0x81 -> UNDEFINED
+ '\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK
+ '\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK
+ '\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK
+ '\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ '\u2020' # 0x86 -> DAGGER
+ '\u2021' # 0x87 -> DOUBLE DAGGER
+ '\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+ '\u2030' # 0x89 -> PER MILLE SIGN
+ '\ufffe' # 0x8A -> UNDEFINED
+ '\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ '\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE
+ '\ufffe' # 0x8D -> UNDEFINED
+ '\ufffe' # 0x8E -> UNDEFINED
+ '\ufffe' # 0x8F -> UNDEFINED
+ '\ufffe' # 0x90 -> UNDEFINED
+ '\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ '\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ '\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ '\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ '\u2022' # 0x95 -> BULLET
+ '\u2013' # 0x96 -> EN DASH
+ '\u2014' # 0x97 -> EM DASH
+ '\u02dc' # 0x98 -> SMALL TILDE
+ '\u2122' # 0x99 -> TRADE MARK SIGN
+ '\ufffe' # 0x9A -> UNDEFINED
+ '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ '\u0153' # 0x9C -> LATIN SMALL LIGATURE OE
+ '\ufffe' # 0x9D -> UNDEFINED
+ '\ufffe' # 0x9E -> UNDEFINED
+ '\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
+ '\xa2' # 0xA2 -> CENT SIGN
+ '\xa3' # 0xA3 -> POUND SIGN
+ '\xa4' # 0xA4 -> CURRENCY SIGN
+ '\xa5' # 0xA5 -> YEN SIGN
+ '\xa6' # 0xA6 -> BROKEN BAR
+ '\xa7' # 0xA7 -> SECTION SIGN
+ '\xa8' # 0xA8 -> DIAERESIS
+ '\xa9' # 0xA9 -> COPYRIGHT SIGN
+ '\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR
+ '\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xac' # 0xAC -> NOT SIGN
+ '\xad' # 0xAD -> SOFT HYPHEN
+ '\xae' # 0xAE -> REGISTERED SIGN
+ '\xaf' # 0xAF -> MACRON
+ '\xb0' # 0xB0 -> DEGREE SIGN
+ '\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ '\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ '\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ '\xb4' # 0xB4 -> ACUTE ACCENT
+ '\xb5' # 0xB5 -> MICRO SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xb7' # 0xB7 -> MIDDLE DOT
+ '\xb8' # 0xB8 -> CEDILLA
+ '\xb9' # 0xB9 -> SUPERSCRIPT ONE
+ '\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR
+ '\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ '\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
+ '\xbf' # 0xBF -> INVERTED QUESTION MARK
+ '\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
+ '\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
+ '\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\u0300' # 0xCC -> COMBINING GRAVE ACCENT
+ '\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
+ '\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\u0309' # 0xD2 -> COMBINING HOOK ABOVE
+ '\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\u01a0' # 0xD5 -> LATIN CAPITAL LETTER O WITH HORN
+ '\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xd7' # 0xD7 -> MULTIPLICATION SIGN
+ '\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+ '\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ '\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\u01af' # 0xDD -> LATIN CAPITAL LETTER U WITH HORN
+ '\u0303' # 0xDE -> COMBINING TILDE
+ '\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S
+ '\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE
+ '\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xe6' # 0xE6 -> LATIN SMALL LETTER AE
+ '\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+ '\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\u0301' # 0xEC -> COMBINING ACUTE ACCENT
+ '\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
+ '\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE
+ '\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE
+ '\u0323' # 0xF2 -> COMBINING DOT BELOW
+ '\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\u01a1' # 0xF5 -> LATIN SMALL LETTER O WITH HORN
+ '\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf7' # 0xF7 -> DIVISION SIGN
+ '\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
+ '\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+ '\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
+ '\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\u01b0' # 0xFD -> LATIN SMALL LETTER U WITH HORN
+ '\u20ab' # 0xFE -> DONG SIGN
+ '\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp273.py b/dist/lib/encodings/cp273.py
new file mode 100644
index 0000000..69c6d77
--- /dev/null
+++ b/dist/lib/encodings/cp273.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp273 generated from 'python-mappings/CP273.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp273',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL (NUL)
+ '\x01' # 0x01 -> START OF HEADING (SOH)
+ '\x02' # 0x02 -> START OF TEXT (STX)
+ '\x03' # 0x03 -> END OF TEXT (ETX)
+ '\x9c' # 0x04 -> STRING TERMINATOR (ST)
+ '\t' # 0x05 -> CHARACTER TABULATION (HT)
+ '\x86' # 0x06 -> START OF SELECTED AREA (SSA)
+ '\x7f' # 0x07 -> DELETE (DEL)
+ '\x97' # 0x08 -> END OF GUARDED AREA (EPA)
+ '\x8d' # 0x09 -> REVERSE LINE FEED (RI)
+ '\x8e' # 0x0A -> SINGLE-SHIFT TWO (SS2)
+ '\x0b' # 0x0B -> LINE TABULATION (VT)
+ '\x0c' # 0x0C -> FORM FEED (FF)
+ '\r' # 0x0D -> CARRIAGE RETURN (CR)
+ '\x0e' # 0x0E -> SHIFT OUT (SO)
+ '\x0f' # 0x0F -> SHIFT IN (SI)
+ '\x10' # 0x10 -> DATALINK ESCAPE (DLE)
+ '\x11' # 0x11 -> DEVICE CONTROL ONE (DC1)
+ '\x12' # 0x12 -> DEVICE CONTROL TWO (DC2)
+ '\x13' # 0x13 -> DEVICE CONTROL THREE (DC3)
+ '\x9d' # 0x14 -> OPERATING SYSTEM COMMAND (OSC)
+ '\x85' # 0x15 -> NEXT LINE (NEL)
+ '\x08' # 0x16 -> BACKSPACE (BS)
+ '\x87' # 0x17 -> END OF SELECTED AREA (ESA)
+ '\x18' # 0x18 -> CANCEL (CAN)
+ '\x19' # 0x19 -> END OF MEDIUM (EM)
+ '\x92' # 0x1A -> PRIVATE USE TWO (PU2)
+ '\x8f' # 0x1B -> SINGLE-SHIFT THREE (SS3)
+ '\x1c' # 0x1C -> FILE SEPARATOR (IS4)
+ '\x1d' # 0x1D -> GROUP SEPARATOR (IS3)
+ '\x1e' # 0x1E -> RECORD SEPARATOR (IS2)
+ '\x1f' # 0x1F -> UNIT SEPARATOR (IS1)
+ '\x80' # 0x20 -> PADDING CHARACTER (PAD)
+ '\x81' # 0x21 -> HIGH OCTET PRESET (HOP)
+ '\x82' # 0x22 -> BREAK PERMITTED HERE (BPH)
+ '\x83' # 0x23 -> NO BREAK HERE (NBH)
+ '\x84' # 0x24 -> INDEX (IND)
+ '\n' # 0x25 -> LINE FEED (LF)
+ '\x17' # 0x26 -> END OF TRANSMISSION BLOCK (ETB)
+ '\x1b' # 0x27 -> ESCAPE (ESC)
+ '\x88' # 0x28 -> CHARACTER TABULATION SET (HTS)
+ '\x89' # 0x29 -> CHARACTER TABULATION WITH JUSTIFICATION (HTJ)
+ '\x8a' # 0x2A -> LINE TABULATION SET (VTS)
+ '\x8b' # 0x2B -> PARTIAL LINE FORWARD (PLD)
+ '\x8c' # 0x2C -> PARTIAL LINE BACKWARD (PLU)
+ '\x05' # 0x2D -> ENQUIRY (ENQ)
+ '\x06' # 0x2E -> ACKNOWLEDGE (ACK)
+ '\x07' # 0x2F -> BELL (BEL)
+ '\x90' # 0x30 -> DEVICE CONTROL STRING (DCS)
+ '\x91' # 0x31 -> PRIVATE USE ONE (PU1)
+ '\x16' # 0x32 -> SYNCHRONOUS IDLE (SYN)
+ '\x93' # 0x33 -> SET TRANSMIT STATE (STS)
+ '\x94' # 0x34 -> CANCEL CHARACTER (CCH)
+ '\x95' # 0x35 -> MESSAGE WAITING (MW)
+ '\x96' # 0x36 -> START OF GUARDED AREA (SPA)
+ '\x04' # 0x37 -> END OF TRANSMISSION (EOT)
+ '\x98' # 0x38 -> START OF STRING (SOS)
+ '\x99' # 0x39 -> SINGLE GRAPHIC CHARACTER INTRODUCER (SGCI)
+ '\x9a' # 0x3A -> SINGLE CHARACTER INTRODUCER (SCI)
+ '\x9b' # 0x3B -> CONTROL SEQUENCE INTRODUCER (CSI)
+ '\x14' # 0x3C -> DEVICE CONTROL FOUR (DC4)
+ '\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE (NAK)
+ '\x9e' # 0x3E -> PRIVACY MESSAGE (PM)
+ '\x1a' # 0x3F -> SUBSTITUTE (SUB)
+ ' ' # 0x40 -> SPACE
+ '\xa0' # 0x41 -> NO-BREAK SPACE
+ '\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '{' # 0x43 -> LEFT CURLY BRACKET
+ '\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
+ '\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
+ '\xc4' # 0x4A -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '.' # 0x4B -> FULL STOP
+ '<' # 0x4C -> LESS-THAN SIGN
+ '(' # 0x4D -> LEFT PARENTHESIS
+ '+' # 0x4E -> PLUS SIGN
+ '!' # 0x4F -> EXCLAMATION MARK
+ '&' # 0x50 -> AMPERSAND
+ '\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
+ '\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
+ '~' # 0x59 -> TILDE
+ '\xdc' # 0x5A -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '$' # 0x5B -> DOLLAR SIGN
+ '*' # 0x5C -> ASTERISK
+ ')' # 0x5D -> RIGHT PARENTHESIS
+ ';' # 0x5E -> SEMICOLON
+ '^' # 0x5F -> CIRCUMFLEX ACCENT
+ '-' # 0x60 -> HYPHEN-MINUS
+ '/' # 0x61 -> SOLIDUS
+ '\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '[' # 0x63 -> LEFT SQUARE BRACKET
+ '\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
+ '\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xf6' # 0x6A -> LATIN SMALL LETTER O WITH DIAERESIS
+ ',' # 0x6B -> COMMA
+ '%' # 0x6C -> PERCENT SIGN
+ '_' # 0x6D -> LOW LINE
+ '>' # 0x6E -> GREATER-THAN SIGN
+ '?' # 0x6F -> QUESTION MARK
+ '\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
+ '\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
+ '`' # 0x79 -> GRAVE ACCENT
+ ':' # 0x7A -> COLON
+ '#' # 0x7B -> NUMBER SIGN
+ '\xa7' # 0x7C -> SECTION SIGN
+ "'" # 0x7D -> APOSTROPHE
+ '=' # 0x7E -> EQUALS SIGN
+ '"' # 0x7F -> QUOTATION MARK
+ '\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
+ 'a' # 0x81 -> LATIN SMALL LETTER A
+ 'b' # 0x82 -> LATIN SMALL LETTER B
+ 'c' # 0x83 -> LATIN SMALL LETTER C
+ 'd' # 0x84 -> LATIN SMALL LETTER D
+ 'e' # 0x85 -> LATIN SMALL LETTER E
+ 'f' # 0x86 -> LATIN SMALL LETTER F
+ 'g' # 0x87 -> LATIN SMALL LETTER G
+ 'h' # 0x88 -> LATIN SMALL LETTER H
+ 'i' # 0x89 -> LATIN SMALL LETTER I
+ '\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xf0' # 0x8C -> LATIN SMALL LETTER ETH (Icelandic)
+ '\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE
+ '\xfe' # 0x8E -> LATIN SMALL LETTER THORN (Icelandic)
+ '\xb1' # 0x8F -> PLUS-MINUS SIGN
+ '\xb0' # 0x90 -> DEGREE SIGN
+ 'j' # 0x91 -> LATIN SMALL LETTER J
+ 'k' # 0x92 -> LATIN SMALL LETTER K
+ 'l' # 0x93 -> LATIN SMALL LETTER L
+ 'm' # 0x94 -> LATIN SMALL LETTER M
+ 'n' # 0x95 -> LATIN SMALL LETTER N
+ 'o' # 0x96 -> LATIN SMALL LETTER O
+ 'p' # 0x97 -> LATIN SMALL LETTER P
+ 'q' # 0x98 -> LATIN SMALL LETTER Q
+ 'r' # 0x99 -> LATIN SMALL LETTER R
+ '\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR
+ '\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR
+ '\xe6' # 0x9C -> LATIN SMALL LETTER AE
+ '\xb8' # 0x9D -> CEDILLA
+ '\xc6' # 0x9E -> LATIN CAPITAL LETTER AE
+ '\xa4' # 0x9F -> CURRENCY SIGN
+ '\xb5' # 0xA0 -> MICRO SIGN
+ '\xdf' # 0xA1 -> LATIN SMALL LETTER SHARP S (German)
+ 's' # 0xA2 -> LATIN SMALL LETTER S
+ 't' # 0xA3 -> LATIN SMALL LETTER T
+ 'u' # 0xA4 -> LATIN SMALL LETTER U
+ 'v' # 0xA5 -> LATIN SMALL LETTER V
+ 'w' # 0xA6 -> LATIN SMALL LETTER W
+ 'x' # 0xA7 -> LATIN SMALL LETTER X
+ 'y' # 0xA8 -> LATIN SMALL LETTER Y
+ 'z' # 0xA9 -> LATIN SMALL LETTER Z
+ '\xa1' # 0xAA -> INVERTED EXCLAMATION MARK
+ '\xbf' # 0xAB -> INVERTED QUESTION MARK
+ '\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (Icelandic)
+ '\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
+ '\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (Icelandic)
+ '\xae' # 0xAF -> REGISTERED SIGN
+ '\xa2' # 0xB0 -> CENT SIGN
+ '\xa3' # 0xB1 -> POUND SIGN
+ '\xa5' # 0xB2 -> YEN SIGN
+ '\xb7' # 0xB3 -> MIDDLE DOT
+ '\xa9' # 0xB4 -> COPYRIGHT SIGN
+ '@' # 0xB5 -> COMMERCIAL AT
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
+ '\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
+ '\xac' # 0xBA -> NOT SIGN
+ '|' # 0xBB -> VERTICAL LINE
+ '\u203e' # 0xBC -> OVERLINE
+ '\xa8' # 0xBD -> DIAERESIS
+ '\xb4' # 0xBE -> ACUTE ACCENT
+ '\xd7' # 0xBF -> MULTIPLICATION SIGN
+ '\xe4' # 0xC0 -> LATIN SMALL LETTER A WITH DIAERESIS
+ 'A' # 0xC1 -> LATIN CAPITAL LETTER A
+ 'B' # 0xC2 -> LATIN CAPITAL LETTER B
+ 'C' # 0xC3 -> LATIN CAPITAL LETTER C
+ 'D' # 0xC4 -> LATIN CAPITAL LETTER D
+ 'E' # 0xC5 -> LATIN CAPITAL LETTER E
+ 'F' # 0xC6 -> LATIN CAPITAL LETTER F
+ 'G' # 0xC7 -> LATIN CAPITAL LETTER G
+ 'H' # 0xC8 -> LATIN CAPITAL LETTER H
+ 'I' # 0xC9 -> LATIN CAPITAL LETTER I
+ '\xad' # 0xCA -> SOFT HYPHEN
+ '\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xa6' # 0xCC -> BROKEN BAR
+ '\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE
+ '\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE
+ '\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE
+ '\xfc' # 0xD0 -> LATIN SMALL LETTER U WITH DIAERESIS
+ 'J' # 0xD1 -> LATIN CAPITAL LETTER J
+ 'K' # 0xD2 -> LATIN CAPITAL LETTER K
+ 'L' # 0xD3 -> LATIN CAPITAL LETTER L
+ 'M' # 0xD4 -> LATIN CAPITAL LETTER M
+ 'N' # 0xD5 -> LATIN CAPITAL LETTER N
+ 'O' # 0xD6 -> LATIN CAPITAL LETTER O
+ 'P' # 0xD7 -> LATIN CAPITAL LETTER P
+ 'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
+ 'R' # 0xD9 -> LATIN CAPITAL LETTER R
+ '\xb9' # 0xDA -> SUPERSCRIPT ONE
+ '\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '}' # 0xDC -> RIGHT CURLY BRACKET
+ '\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE
+ '\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE
+ '\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
+ '\xd6' # 0xE0 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xf7' # 0xE1 -> DIVISION SIGN
+ 'S' # 0xE2 -> LATIN CAPITAL LETTER S
+ 'T' # 0xE3 -> LATIN CAPITAL LETTER T
+ 'U' # 0xE4 -> LATIN CAPITAL LETTER U
+ 'V' # 0xE5 -> LATIN CAPITAL LETTER V
+ 'W' # 0xE6 -> LATIN CAPITAL LETTER W
+ 'X' # 0xE7 -> LATIN CAPITAL LETTER X
+ 'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
+ '\xb2' # 0xEA -> SUPERSCRIPT TWO
+ '\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\\' # 0xEC -> REVERSE SOLIDUS
+ '\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE
+ '\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE
+ '0' # 0xF0 -> DIGIT ZERO
+ '1' # 0xF1 -> DIGIT ONE
+ '2' # 0xF2 -> DIGIT TWO
+ '3' # 0xF3 -> DIGIT THREE
+ '4' # 0xF4 -> DIGIT FOUR
+ '5' # 0xF5 -> DIGIT FIVE
+ '6' # 0xF6 -> DIGIT SIX
+ '7' # 0xF7 -> DIGIT SEVEN
+ '8' # 0xF8 -> DIGIT EIGHT
+ '9' # 0xF9 -> DIGIT NINE
+ '\xb3' # 0xFA -> SUPERSCRIPT THREE
+ '\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ ']' # 0xFC -> RIGHT SQUARE BRACKET
+ '\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\x9f' # 0xFF -> APPLICATION PROGRAM COMMAND (APC)
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp424.py b/dist/lib/encodings/cp424.py
new file mode 100644
index 0000000..6753daf
--- /dev/null
+++ b/dist/lib/encodings/cp424.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp424 generated from 'MAPPINGS/VENDORS/MISC/CP424.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp424',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x9c' # 0x04 -> SELECT
+ '\t' # 0x05 -> HORIZONTAL TABULATION
+ '\x86' # 0x06 -> REQUIRED NEW LINE
+ '\x7f' # 0x07 -> DELETE
+ '\x97' # 0x08 -> GRAPHIC ESCAPE
+ '\x8d' # 0x09 -> SUPERSCRIPT
+ '\x8e' # 0x0A -> REPEAT
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x9d' # 0x14 -> RESTORE/ENABLE PRESENTATION
+ '\x85' # 0x15 -> NEW LINE
+ '\x08' # 0x16 -> BACKSPACE
+ '\x87' # 0x17 -> PROGRAM OPERATOR COMMUNICATION
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x92' # 0x1A -> UNIT BACK SPACE
+ '\x8f' # 0x1B -> CUSTOMER USE ONE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ '\x80' # 0x20 -> DIGIT SELECT
+ '\x81' # 0x21 -> START OF SIGNIFICANCE
+ '\x82' # 0x22 -> FIELD SEPARATOR
+ '\x83' # 0x23 -> WORD UNDERSCORE
+ '\x84' # 0x24 -> BYPASS OR INHIBIT PRESENTATION
+ '\n' # 0x25 -> LINE FEED
+ '\x17' # 0x26 -> END OF TRANSMISSION BLOCK
+ '\x1b' # 0x27 -> ESCAPE
+ '\x88' # 0x28 -> SET ATTRIBUTE
+ '\x89' # 0x29 -> START FIELD EXTENDED
+ '\x8a' # 0x2A -> SET MODE OR SWITCH
+ '\x8b' # 0x2B -> CONTROL SEQUENCE PREFIX
+ '\x8c' # 0x2C -> MODIFY FIELD ATTRIBUTE
+ '\x05' # 0x2D -> ENQUIRY
+ '\x06' # 0x2E -> ACKNOWLEDGE
+ '\x07' # 0x2F -> BELL
+ '\x90' # 0x30 ->
+ '\x91' # 0x31 ->
+ '\x16' # 0x32 -> SYNCHRONOUS IDLE
+ '\x93' # 0x33 -> INDEX RETURN
+ '\x94' # 0x34 -> PRESENTATION POSITION
+ '\x95' # 0x35 -> TRANSPARENT
+ '\x96' # 0x36 -> NUMERIC BACKSPACE
+ '\x04' # 0x37 -> END OF TRANSMISSION
+ '\x98' # 0x38 -> SUBSCRIPT
+ '\x99' # 0x39 -> INDENT TABULATION
+ '\x9a' # 0x3A -> REVERSE FORM FEED
+ '\x9b' # 0x3B -> CUSTOMER USE THREE
+ '\x14' # 0x3C -> DEVICE CONTROL FOUR
+ '\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
+ '\x9e' # 0x3E ->
+ '\x1a' # 0x3F -> SUBSTITUTE
+ ' ' # 0x40 -> SPACE
+ '\u05d0' # 0x41 -> HEBREW LETTER ALEF
+ '\u05d1' # 0x42 -> HEBREW LETTER BET
+ '\u05d2' # 0x43 -> HEBREW LETTER GIMEL
+ '\u05d3' # 0x44 -> HEBREW LETTER DALET
+ '\u05d4' # 0x45 -> HEBREW LETTER HE
+ '\u05d5' # 0x46 -> HEBREW LETTER VAV
+ '\u05d6' # 0x47 -> HEBREW LETTER ZAYIN
+ '\u05d7' # 0x48 -> HEBREW LETTER HET
+ '\u05d8' # 0x49 -> HEBREW LETTER TET
+ '\xa2' # 0x4A -> CENT SIGN
+ '.' # 0x4B -> FULL STOP
+ '<' # 0x4C -> LESS-THAN SIGN
+ '(' # 0x4D -> LEFT PARENTHESIS
+ '+' # 0x4E -> PLUS SIGN
+ '|' # 0x4F -> VERTICAL LINE
+ '&' # 0x50 -> AMPERSAND
+ '\u05d9' # 0x51 -> HEBREW LETTER YOD
+ '\u05da' # 0x52 -> HEBREW LETTER FINAL KAF
+ '\u05db' # 0x53 -> HEBREW LETTER KAF
+ '\u05dc' # 0x54 -> HEBREW LETTER LAMED
+ '\u05dd' # 0x55 -> HEBREW LETTER FINAL MEM
+ '\u05de' # 0x56 -> HEBREW LETTER MEM
+ '\u05df' # 0x57 -> HEBREW LETTER FINAL NUN
+ '\u05e0' # 0x58 -> HEBREW LETTER NUN
+ '\u05e1' # 0x59 -> HEBREW LETTER SAMEKH
+ '!' # 0x5A -> EXCLAMATION MARK
+ '$' # 0x5B -> DOLLAR SIGN
+ '*' # 0x5C -> ASTERISK
+ ')' # 0x5D -> RIGHT PARENTHESIS
+ ';' # 0x5E -> SEMICOLON
+ '\xac' # 0x5F -> NOT SIGN
+ '-' # 0x60 -> HYPHEN-MINUS
+ '/' # 0x61 -> SOLIDUS
+ '\u05e2' # 0x62 -> HEBREW LETTER AYIN
+ '\u05e3' # 0x63 -> HEBREW LETTER FINAL PE
+ '\u05e4' # 0x64 -> HEBREW LETTER PE
+ '\u05e5' # 0x65 -> HEBREW LETTER FINAL TSADI
+ '\u05e6' # 0x66 -> HEBREW LETTER TSADI
+ '\u05e7' # 0x67 -> HEBREW LETTER QOF
+ '\u05e8' # 0x68 -> HEBREW LETTER RESH
+ '\u05e9' # 0x69 -> HEBREW LETTER SHIN
+ '\xa6' # 0x6A -> BROKEN BAR
+ ',' # 0x6B -> COMMA
+ '%' # 0x6C -> PERCENT SIGN
+ '_' # 0x6D -> LOW LINE
+ '>' # 0x6E -> GREATER-THAN SIGN
+ '?' # 0x6F -> QUESTION MARK
+ '\ufffe' # 0x70 -> UNDEFINED
+ '\u05ea' # 0x71 -> HEBREW LETTER TAV
+ '\ufffe' # 0x72 -> UNDEFINED
+ '\ufffe' # 0x73 -> UNDEFINED
+ '\xa0' # 0x74 -> NO-BREAK SPACE
+ '\ufffe' # 0x75 -> UNDEFINED
+ '\ufffe' # 0x76 -> UNDEFINED
+ '\ufffe' # 0x77 -> UNDEFINED
+ '\u2017' # 0x78 -> DOUBLE LOW LINE
+ '`' # 0x79 -> GRAVE ACCENT
+ ':' # 0x7A -> COLON
+ '#' # 0x7B -> NUMBER SIGN
+ '@' # 0x7C -> COMMERCIAL AT
+ "'" # 0x7D -> APOSTROPHE
+ '=' # 0x7E -> EQUALS SIGN
+ '"' # 0x7F -> QUOTATION MARK
+ '\ufffe' # 0x80 -> UNDEFINED
+ 'a' # 0x81 -> LATIN SMALL LETTER A
+ 'b' # 0x82 -> LATIN SMALL LETTER B
+ 'c' # 0x83 -> LATIN SMALL LETTER C
+ 'd' # 0x84 -> LATIN SMALL LETTER D
+ 'e' # 0x85 -> LATIN SMALL LETTER E
+ 'f' # 0x86 -> LATIN SMALL LETTER F
+ 'g' # 0x87 -> LATIN SMALL LETTER G
+ 'h' # 0x88 -> LATIN SMALL LETTER H
+ 'i' # 0x89 -> LATIN SMALL LETTER I
+ '\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\ufffe' # 0x8C -> UNDEFINED
+ '\ufffe' # 0x8D -> UNDEFINED
+ '\ufffe' # 0x8E -> UNDEFINED
+ '\xb1' # 0x8F -> PLUS-MINUS SIGN
+ '\xb0' # 0x90 -> DEGREE SIGN
+ 'j' # 0x91 -> LATIN SMALL LETTER J
+ 'k' # 0x92 -> LATIN SMALL LETTER K
+ 'l' # 0x93 -> LATIN SMALL LETTER L
+ 'm' # 0x94 -> LATIN SMALL LETTER M
+ 'n' # 0x95 -> LATIN SMALL LETTER N
+ 'o' # 0x96 -> LATIN SMALL LETTER O
+ 'p' # 0x97 -> LATIN SMALL LETTER P
+ 'q' # 0x98 -> LATIN SMALL LETTER Q
+ 'r' # 0x99 -> LATIN SMALL LETTER R
+ '\ufffe' # 0x9A -> UNDEFINED
+ '\ufffe' # 0x9B -> UNDEFINED
+ '\ufffe' # 0x9C -> UNDEFINED
+ '\xb8' # 0x9D -> CEDILLA
+ '\ufffe' # 0x9E -> UNDEFINED
+ '\xa4' # 0x9F -> CURRENCY SIGN
+ '\xb5' # 0xA0 -> MICRO SIGN
+ '~' # 0xA1 -> TILDE
+ 's' # 0xA2 -> LATIN SMALL LETTER S
+ 't' # 0xA3 -> LATIN SMALL LETTER T
+ 'u' # 0xA4 -> LATIN SMALL LETTER U
+ 'v' # 0xA5 -> LATIN SMALL LETTER V
+ 'w' # 0xA6 -> LATIN SMALL LETTER W
+ 'x' # 0xA7 -> LATIN SMALL LETTER X
+ 'y' # 0xA8 -> LATIN SMALL LETTER Y
+ 'z' # 0xA9 -> LATIN SMALL LETTER Z
+ '\ufffe' # 0xAA -> UNDEFINED
+ '\ufffe' # 0xAB -> UNDEFINED
+ '\ufffe' # 0xAC -> UNDEFINED
+ '\ufffe' # 0xAD -> UNDEFINED
+ '\ufffe' # 0xAE -> UNDEFINED
+ '\xae' # 0xAF -> REGISTERED SIGN
+ '^' # 0xB0 -> CIRCUMFLEX ACCENT
+ '\xa3' # 0xB1 -> POUND SIGN
+ '\xa5' # 0xB2 -> YEN SIGN
+ '\xb7' # 0xB3 -> MIDDLE DOT
+ '\xa9' # 0xB4 -> COPYRIGHT SIGN
+ '\xa7' # 0xB5 -> SECTION SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
+ '\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
+ '[' # 0xBA -> LEFT SQUARE BRACKET
+ ']' # 0xBB -> RIGHT SQUARE BRACKET
+ '\xaf' # 0xBC -> MACRON
+ '\xa8' # 0xBD -> DIAERESIS
+ '\xb4' # 0xBE -> ACUTE ACCENT
+ '\xd7' # 0xBF -> MULTIPLICATION SIGN
+ '{' # 0xC0 -> LEFT CURLY BRACKET
+ 'A' # 0xC1 -> LATIN CAPITAL LETTER A
+ 'B' # 0xC2 -> LATIN CAPITAL LETTER B
+ 'C' # 0xC3 -> LATIN CAPITAL LETTER C
+ 'D' # 0xC4 -> LATIN CAPITAL LETTER D
+ 'E' # 0xC5 -> LATIN CAPITAL LETTER E
+ 'F' # 0xC6 -> LATIN CAPITAL LETTER F
+ 'G' # 0xC7 -> LATIN CAPITAL LETTER G
+ 'H' # 0xC8 -> LATIN CAPITAL LETTER H
+ 'I' # 0xC9 -> LATIN CAPITAL LETTER I
+ '\xad' # 0xCA -> SOFT HYPHEN
+ '\ufffe' # 0xCB -> UNDEFINED
+ '\ufffe' # 0xCC -> UNDEFINED
+ '\ufffe' # 0xCD -> UNDEFINED
+ '\ufffe' # 0xCE -> UNDEFINED
+ '\ufffe' # 0xCF -> UNDEFINED
+ '}' # 0xD0 -> RIGHT CURLY BRACKET
+ 'J' # 0xD1 -> LATIN CAPITAL LETTER J
+ 'K' # 0xD2 -> LATIN CAPITAL LETTER K
+ 'L' # 0xD3 -> LATIN CAPITAL LETTER L
+ 'M' # 0xD4 -> LATIN CAPITAL LETTER M
+ 'N' # 0xD5 -> LATIN CAPITAL LETTER N
+ 'O' # 0xD6 -> LATIN CAPITAL LETTER O
+ 'P' # 0xD7 -> LATIN CAPITAL LETTER P
+ 'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
+ 'R' # 0xD9 -> LATIN CAPITAL LETTER R
+ '\xb9' # 0xDA -> SUPERSCRIPT ONE
+ '\ufffe' # 0xDB -> UNDEFINED
+ '\ufffe' # 0xDC -> UNDEFINED
+ '\ufffe' # 0xDD -> UNDEFINED
+ '\ufffe' # 0xDE -> UNDEFINED
+ '\ufffe' # 0xDF -> UNDEFINED
+ '\\' # 0xE0 -> REVERSE SOLIDUS
+ '\xf7' # 0xE1 -> DIVISION SIGN
+ 'S' # 0xE2 -> LATIN CAPITAL LETTER S
+ 'T' # 0xE3 -> LATIN CAPITAL LETTER T
+ 'U' # 0xE4 -> LATIN CAPITAL LETTER U
+ 'V' # 0xE5 -> LATIN CAPITAL LETTER V
+ 'W' # 0xE6 -> LATIN CAPITAL LETTER W
+ 'X' # 0xE7 -> LATIN CAPITAL LETTER X
+ 'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
+ '\xb2' # 0xEA -> SUPERSCRIPT TWO
+ '\ufffe' # 0xEB -> UNDEFINED
+ '\ufffe' # 0xEC -> UNDEFINED
+ '\ufffe' # 0xED -> UNDEFINED
+ '\ufffe' # 0xEE -> UNDEFINED
+ '\ufffe' # 0xEF -> UNDEFINED
+ '0' # 0xF0 -> DIGIT ZERO
+ '1' # 0xF1 -> DIGIT ONE
+ '2' # 0xF2 -> DIGIT TWO
+ '3' # 0xF3 -> DIGIT THREE
+ '4' # 0xF4 -> DIGIT FOUR
+ '5' # 0xF5 -> DIGIT FIVE
+ '6' # 0xF6 -> DIGIT SIX
+ '7' # 0xF7 -> DIGIT SEVEN
+ '8' # 0xF8 -> DIGIT EIGHT
+ '9' # 0xF9 -> DIGIT NINE
+ '\xb3' # 0xFA -> SUPERSCRIPT THREE
+ '\ufffe' # 0xFB -> UNDEFINED
+ '\ufffe' # 0xFC -> UNDEFINED
+ '\ufffe' # 0xFD -> UNDEFINED
+ '\ufffe' # 0xFE -> UNDEFINED
+ '\x9f' # 0xFF -> EIGHT ONES
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp437.py b/dist/lib/encodings/cp437.py
new file mode 100644
index 0000000..b6c75e2
--- /dev/null
+++ b/dist/lib/encodings/cp437.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec cp437 generated from 'VENDORS/MICSFT/PC/CP437.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp437',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
+ 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
+ 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
+ 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00a2, # CENT SIGN
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00a5, # YEN SIGN
+ 0x009e: 0x20a7, # PESETA SIGN
+ 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
+ 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
+ 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x2310, # REVERSED NOT SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x00e3: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x00ec: 0x221e, # INFINITY
+ 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x00ef: 0x2229, # INTERSECTION
+ 0x00f0: 0x2261, # IDENTICAL TO
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
+ 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
+ 0x00f4: 0x2320, # TOP HALF INTEGRAL
+ 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x2248, # ALMOST EQUAL TO
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ '\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE
+ '\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
+ '\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
+ '\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+ '\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+ '\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
+ '\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xa2' # 0x009b -> CENT SIGN
+ '\xa3' # 0x009c -> POUND SIGN
+ '\xa5' # 0x009d -> YEN SIGN
+ '\u20a7' # 0x009e -> PESETA SIGN
+ '\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
+ '\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ '\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+ '\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
+ '\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
+ '\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ '\u2310' # 0x00a9 -> REVERSED NOT SIGN
+ '\xac' # 0x00aa -> NOT SIGN
+ '\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ '\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ '\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ '\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ '\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ '\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ '\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ '\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ '\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ '\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ '\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ '\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ '\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ '\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ '\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ '\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ '\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ '\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\u258c' # 0x00dd -> LEFT HALF BLOCK
+ '\u2590' # 0x00de -> RIGHT HALF BLOCK
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
+ '\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ '\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
+ '\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
+ '\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
+ '\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
+ '\xb5' # 0x00e6 -> MICRO SIGN
+ '\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
+ '\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
+ '\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
+ '\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
+ '\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
+ '\u221e' # 0x00ec -> INFINITY
+ '\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
+ '\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
+ '\u2229' # 0x00ef -> INTERSECTION
+ '\u2261' # 0x00f0 -> IDENTICAL TO
+ '\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ '\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
+ '\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
+ '\u2320' # 0x00f4 -> TOP HALF INTEGRAL
+ '\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
+ '\xf7' # 0x00f6 -> DIVISION SIGN
+ '\u2248' # 0x00f7 -> ALMOST EQUAL TO
+ '\xb0' # 0x00f8 -> DEGREE SIGN
+ '\u2219' # 0x00f9 -> BULLET OPERATOR
+ '\xb7' # 0x00fa -> MIDDLE DOT
+ '\u221a' # 0x00fb -> SQUARE ROOT
+ '\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+ '\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a2: 0x009b, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a5: 0x009d, # YEN SIGN
+ 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
+ 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
+ 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
+ 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
+ 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
+ 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
+ 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
+ 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
+ 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
+ 0x03c0: 0x00e3, # GREEK SMALL LETTER PI
+ 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
+ 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
+ 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x20a7: 0x009e, # PESETA SIGN
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x221e: 0x00ec, # INFINITY
+ 0x2229: 0x00ef, # INTERSECTION
+ 0x2248: 0x00f7, # ALMOST EQUAL TO
+ 0x2261: 0x00f0, # IDENTICAL TO
+ 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
+ 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
+ 0x2310: 0x00a9, # REVERSED NOT SIGN
+ 0x2320: 0x00f4, # TOP HALF INTEGRAL
+ 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp500.py b/dist/lib/encodings/cp500.py
new file mode 100644
index 0000000..5f61535
--- /dev/null
+++ b/dist/lib/encodings/cp500.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp500 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp500',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x9c' # 0x04 -> CONTROL
+ '\t' # 0x05 -> HORIZONTAL TABULATION
+ '\x86' # 0x06 -> CONTROL
+ '\x7f' # 0x07 -> DELETE
+ '\x97' # 0x08 -> CONTROL
+ '\x8d' # 0x09 -> CONTROL
+ '\x8e' # 0x0A -> CONTROL
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x9d' # 0x14 -> CONTROL
+ '\x85' # 0x15 -> CONTROL
+ '\x08' # 0x16 -> BACKSPACE
+ '\x87' # 0x17 -> CONTROL
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x92' # 0x1A -> CONTROL
+ '\x8f' # 0x1B -> CONTROL
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ '\x80' # 0x20 -> CONTROL
+ '\x81' # 0x21 -> CONTROL
+ '\x82' # 0x22 -> CONTROL
+ '\x83' # 0x23 -> CONTROL
+ '\x84' # 0x24 -> CONTROL
+ '\n' # 0x25 -> LINE FEED
+ '\x17' # 0x26 -> END OF TRANSMISSION BLOCK
+ '\x1b' # 0x27 -> ESCAPE
+ '\x88' # 0x28 -> CONTROL
+ '\x89' # 0x29 -> CONTROL
+ '\x8a' # 0x2A -> CONTROL
+ '\x8b' # 0x2B -> CONTROL
+ '\x8c' # 0x2C -> CONTROL
+ '\x05' # 0x2D -> ENQUIRY
+ '\x06' # 0x2E -> ACKNOWLEDGE
+ '\x07' # 0x2F -> BELL
+ '\x90' # 0x30 -> CONTROL
+ '\x91' # 0x31 -> CONTROL
+ '\x16' # 0x32 -> SYNCHRONOUS IDLE
+ '\x93' # 0x33 -> CONTROL
+ '\x94' # 0x34 -> CONTROL
+ '\x95' # 0x35 -> CONTROL
+ '\x96' # 0x36 -> CONTROL
+ '\x04' # 0x37 -> END OF TRANSMISSION
+ '\x98' # 0x38 -> CONTROL
+ '\x99' # 0x39 -> CONTROL
+ '\x9a' # 0x3A -> CONTROL
+ '\x9b' # 0x3B -> CONTROL
+ '\x14' # 0x3C -> DEVICE CONTROL FOUR
+ '\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
+ '\x9e' # 0x3E -> CONTROL
+ '\x1a' # 0x3F -> SUBSTITUTE
+ ' ' # 0x40 -> SPACE
+ '\xa0' # 0x41 -> NO-BREAK SPACE
+ '\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
+ '\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
+ '[' # 0x4A -> LEFT SQUARE BRACKET
+ '.' # 0x4B -> FULL STOP
+ '<' # 0x4C -> LESS-THAN SIGN
+ '(' # 0x4D -> LEFT PARENTHESIS
+ '+' # 0x4E -> PLUS SIGN
+ '!' # 0x4F -> EXCLAMATION MARK
+ '&' # 0x50 -> AMPERSAND
+ '\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
+ '\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
+ '\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
+ ']' # 0x5A -> RIGHT SQUARE BRACKET
+ '$' # 0x5B -> DOLLAR SIGN
+ '*' # 0x5C -> ASTERISK
+ ')' # 0x5D -> RIGHT PARENTHESIS
+ ';' # 0x5E -> SEMICOLON
+ '^' # 0x5F -> CIRCUMFLEX ACCENT
+ '-' # 0x60 -> HYPHEN-MINUS
+ '/' # 0x61 -> SOLIDUS
+ '\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
+ '\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xa6' # 0x6A -> BROKEN BAR
+ ',' # 0x6B -> COMMA
+ '%' # 0x6C -> PERCENT SIGN
+ '_' # 0x6D -> LOW LINE
+ '>' # 0x6E -> GREATER-THAN SIGN
+ '?' # 0x6F -> QUESTION MARK
+ '\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
+ '\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
+ '`' # 0x79 -> GRAVE ACCENT
+ ':' # 0x7A -> COLON
+ '#' # 0x7B -> NUMBER SIGN
+ '@' # 0x7C -> COMMERCIAL AT
+ "'" # 0x7D -> APOSTROPHE
+ '=' # 0x7E -> EQUALS SIGN
+ '"' # 0x7F -> QUOTATION MARK
+ '\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
+ 'a' # 0x81 -> LATIN SMALL LETTER A
+ 'b' # 0x82 -> LATIN SMALL LETTER B
+ 'c' # 0x83 -> LATIN SMALL LETTER C
+ 'd' # 0x84 -> LATIN SMALL LETTER D
+ 'e' # 0x85 -> LATIN SMALL LETTER E
+ 'f' # 0x86 -> LATIN SMALL LETTER F
+ 'g' # 0x87 -> LATIN SMALL LETTER G
+ 'h' # 0x88 -> LATIN SMALL LETTER H
+ 'i' # 0x89 -> LATIN SMALL LETTER I
+ '\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC)
+ '\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE
+ '\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC)
+ '\xb1' # 0x8F -> PLUS-MINUS SIGN
+ '\xb0' # 0x90 -> DEGREE SIGN
+ 'j' # 0x91 -> LATIN SMALL LETTER J
+ 'k' # 0x92 -> LATIN SMALL LETTER K
+ 'l' # 0x93 -> LATIN SMALL LETTER L
+ 'm' # 0x94 -> LATIN SMALL LETTER M
+ 'n' # 0x95 -> LATIN SMALL LETTER N
+ 'o' # 0x96 -> LATIN SMALL LETTER O
+ 'p' # 0x97 -> LATIN SMALL LETTER P
+ 'q' # 0x98 -> LATIN SMALL LETTER Q
+ 'r' # 0x99 -> LATIN SMALL LETTER R
+ '\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR
+ '\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR
+ '\xe6' # 0x9C -> LATIN SMALL LIGATURE AE
+ '\xb8' # 0x9D -> CEDILLA
+ '\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE
+ '\xa4' # 0x9F -> CURRENCY SIGN
+ '\xb5' # 0xA0 -> MICRO SIGN
+ '~' # 0xA1 -> TILDE
+ 's' # 0xA2 -> LATIN SMALL LETTER S
+ 't' # 0xA3 -> LATIN SMALL LETTER T
+ 'u' # 0xA4 -> LATIN SMALL LETTER U
+ 'v' # 0xA5 -> LATIN SMALL LETTER V
+ 'w' # 0xA6 -> LATIN SMALL LETTER W
+ 'x' # 0xA7 -> LATIN SMALL LETTER X
+ 'y' # 0xA8 -> LATIN SMALL LETTER Y
+ 'z' # 0xA9 -> LATIN SMALL LETTER Z
+ '\xa1' # 0xAA -> INVERTED EXCLAMATION MARK
+ '\xbf' # 0xAB -> INVERTED QUESTION MARK
+ '\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC)
+ '\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
+ '\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC)
+ '\xae' # 0xAF -> REGISTERED SIGN
+ '\xa2' # 0xB0 -> CENT SIGN
+ '\xa3' # 0xB1 -> POUND SIGN
+ '\xa5' # 0xB2 -> YEN SIGN
+ '\xb7' # 0xB3 -> MIDDLE DOT
+ '\xa9' # 0xB4 -> COPYRIGHT SIGN
+ '\xa7' # 0xB5 -> SECTION SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF
+ '\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS
+ '\xac' # 0xBA -> NOT SIGN
+ '|' # 0xBB -> VERTICAL LINE
+ '\xaf' # 0xBC -> MACRON
+ '\xa8' # 0xBD -> DIAERESIS
+ '\xb4' # 0xBE -> ACUTE ACCENT
+ '\xd7' # 0xBF -> MULTIPLICATION SIGN
+ '{' # 0xC0 -> LEFT CURLY BRACKET
+ 'A' # 0xC1 -> LATIN CAPITAL LETTER A
+ 'B' # 0xC2 -> LATIN CAPITAL LETTER B
+ 'C' # 0xC3 -> LATIN CAPITAL LETTER C
+ 'D' # 0xC4 -> LATIN CAPITAL LETTER D
+ 'E' # 0xC5 -> LATIN CAPITAL LETTER E
+ 'F' # 0xC6 -> LATIN CAPITAL LETTER F
+ 'G' # 0xC7 -> LATIN CAPITAL LETTER G
+ 'H' # 0xC8 -> LATIN CAPITAL LETTER H
+ 'I' # 0xC9 -> LATIN CAPITAL LETTER I
+ '\xad' # 0xCA -> SOFT HYPHEN
+ '\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE
+ '\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE
+ '\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE
+ '}' # 0xD0 -> RIGHT CURLY BRACKET
+ 'J' # 0xD1 -> LATIN CAPITAL LETTER J
+ 'K' # 0xD2 -> LATIN CAPITAL LETTER K
+ 'L' # 0xD3 -> LATIN CAPITAL LETTER L
+ 'M' # 0xD4 -> LATIN CAPITAL LETTER M
+ 'N' # 0xD5 -> LATIN CAPITAL LETTER N
+ 'O' # 0xD6 -> LATIN CAPITAL LETTER O
+ 'P' # 0xD7 -> LATIN CAPITAL LETTER P
+ 'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
+ 'R' # 0xD9 -> LATIN CAPITAL LETTER R
+ '\xb9' # 0xDA -> SUPERSCRIPT ONE
+ '\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE
+ '\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE
+ '\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
+ '\\' # 0xE0 -> REVERSE SOLIDUS
+ '\xf7' # 0xE1 -> DIVISION SIGN
+ 'S' # 0xE2 -> LATIN CAPITAL LETTER S
+ 'T' # 0xE3 -> LATIN CAPITAL LETTER T
+ 'U' # 0xE4 -> LATIN CAPITAL LETTER U
+ 'V' # 0xE5 -> LATIN CAPITAL LETTER V
+ 'W' # 0xE6 -> LATIN CAPITAL LETTER W
+ 'X' # 0xE7 -> LATIN CAPITAL LETTER X
+ 'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
+ '\xb2' # 0xEA -> SUPERSCRIPT TWO
+ '\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE
+ '\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE
+ '0' # 0xF0 -> DIGIT ZERO
+ '1' # 0xF1 -> DIGIT ONE
+ '2' # 0xF2 -> DIGIT TWO
+ '3' # 0xF3 -> DIGIT THREE
+ '4' # 0xF4 -> DIGIT FOUR
+ '5' # 0xF5 -> DIGIT FIVE
+ '6' # 0xF6 -> DIGIT SIX
+ '7' # 0xF7 -> DIGIT SEVEN
+ '8' # 0xF8 -> DIGIT EIGHT
+ '9' # 0xF9 -> DIGIT NINE
+ '\xb3' # 0xFA -> SUPERSCRIPT THREE
+ '\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ '\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\x9f' # 0xFF -> CONTROL
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp720.py b/dist/lib/encodings/cp720.py
new file mode 100644
index 0000000..96d6096
--- /dev/null
+++ b/dist/lib/encodings/cp720.py
@@ -0,0 +1,309 @@
+"""Python Character Mapping Codec cp720 generated on Windows:
+Vista 6.0.6002 SP2 Multiprocessor Free with the command:
+ python Tools/unicode/genwincodec.py 720
+"""#"
+
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp720',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> CONTROL CHARACTER
+ '\x01' # 0x01 -> CONTROL CHARACTER
+ '\x02' # 0x02 -> CONTROL CHARACTER
+ '\x03' # 0x03 -> CONTROL CHARACTER
+ '\x04' # 0x04 -> CONTROL CHARACTER
+ '\x05' # 0x05 -> CONTROL CHARACTER
+ '\x06' # 0x06 -> CONTROL CHARACTER
+ '\x07' # 0x07 -> CONTROL CHARACTER
+ '\x08' # 0x08 -> CONTROL CHARACTER
+ '\t' # 0x09 -> CONTROL CHARACTER
+ '\n' # 0x0A -> CONTROL CHARACTER
+ '\x0b' # 0x0B -> CONTROL CHARACTER
+ '\x0c' # 0x0C -> CONTROL CHARACTER
+ '\r' # 0x0D -> CONTROL CHARACTER
+ '\x0e' # 0x0E -> CONTROL CHARACTER
+ '\x0f' # 0x0F -> CONTROL CHARACTER
+ '\x10' # 0x10 -> CONTROL CHARACTER
+ '\x11' # 0x11 -> CONTROL CHARACTER
+ '\x12' # 0x12 -> CONTROL CHARACTER
+ '\x13' # 0x13 -> CONTROL CHARACTER
+ '\x14' # 0x14 -> CONTROL CHARACTER
+ '\x15' # 0x15 -> CONTROL CHARACTER
+ '\x16' # 0x16 -> CONTROL CHARACTER
+ '\x17' # 0x17 -> CONTROL CHARACTER
+ '\x18' # 0x18 -> CONTROL CHARACTER
+ '\x19' # 0x19 -> CONTROL CHARACTER
+ '\x1a' # 0x1A -> CONTROL CHARACTER
+ '\x1b' # 0x1B -> CONTROL CHARACTER
+ '\x1c' # 0x1C -> CONTROL CHARACTER
+ '\x1d' # 0x1D -> CONTROL CHARACTER
+ '\x1e' # 0x1E -> CONTROL CHARACTER
+ '\x1f' # 0x1F -> CONTROL CHARACTER
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> CONTROL CHARACTER
+ '\x80'
+ '\x81'
+ '\xe9' # 0x82 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xe2' # 0x83 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\x84'
+ '\xe0' # 0x85 -> LATIN SMALL LETTER A WITH GRAVE
+ '\x86'
+ '\xe7' # 0x87 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xea' # 0x88 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0x89 -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xe8' # 0x8A -> LATIN SMALL LETTER E WITH GRAVE
+ '\xef' # 0x8B -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xee' # 0x8C -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\x8d'
+ '\x8e'
+ '\x8f'
+ '\x90'
+ '\u0651' # 0x91 -> ARABIC SHADDA
+ '\u0652' # 0x92 -> ARABIC SUKUN
+ '\xf4' # 0x93 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xa4' # 0x94 -> CURRENCY SIGN
+ '\u0640' # 0x95 -> ARABIC TATWEEL
+ '\xfb' # 0x96 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xf9' # 0x97 -> LATIN SMALL LETTER U WITH GRAVE
+ '\u0621' # 0x98 -> ARABIC LETTER HAMZA
+ '\u0622' # 0x99 -> ARABIC LETTER ALEF WITH MADDA ABOVE
+ '\u0623' # 0x9A -> ARABIC LETTER ALEF WITH HAMZA ABOVE
+ '\u0624' # 0x9B -> ARABIC LETTER WAW WITH HAMZA ABOVE
+ '\xa3' # 0x9C -> POUND SIGN
+ '\u0625' # 0x9D -> ARABIC LETTER ALEF WITH HAMZA BELOW
+ '\u0626' # 0x9E -> ARABIC LETTER YEH WITH HAMZA ABOVE
+ '\u0627' # 0x9F -> ARABIC LETTER ALEF
+ '\u0628' # 0xA0 -> ARABIC LETTER BEH
+ '\u0629' # 0xA1 -> ARABIC LETTER TEH MARBUTA
+ '\u062a' # 0xA2 -> ARABIC LETTER TEH
+ '\u062b' # 0xA3 -> ARABIC LETTER THEH
+ '\u062c' # 0xA4 -> ARABIC LETTER JEEM
+ '\u062d' # 0xA5 -> ARABIC LETTER HAH
+ '\u062e' # 0xA6 -> ARABIC LETTER KHAH
+ '\u062f' # 0xA7 -> ARABIC LETTER DAL
+ '\u0630' # 0xA8 -> ARABIC LETTER THAL
+ '\u0631' # 0xA9 -> ARABIC LETTER REH
+ '\u0632' # 0xAA -> ARABIC LETTER ZAIN
+ '\u0633' # 0xAB -> ARABIC LETTER SEEN
+ '\u0634' # 0xAC -> ARABIC LETTER SHEEN
+ '\u0635' # 0xAD -> ARABIC LETTER SAD
+ '\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u2591' # 0xB0 -> LIGHT SHADE
+ '\u2592' # 0xB1 -> MEDIUM SHADE
+ '\u2593' # 0xB2 -> DARK SHADE
+ '\u2502' # 0xB3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\u2561' # 0xB5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ '\u2562' # 0xB6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ '\u2556' # 0xB7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ '\u2555' # 0xB8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ '\u2563' # 0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0xBA -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\u255c' # 0xBD -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ '\u255b' # 0xBE -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ '\u2510' # 0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\u255e' # 0xC6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ '\u255f' # 0xC7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ '\u255a' # 0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\u2567' # 0xCF -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ '\u2568' # 0xD0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ '\u2564' # 0xD1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ '\u2565' # 0xD2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ '\u2559' # 0xD3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ '\u2558' # 0xD4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ '\u2552' # 0xD5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ '\u2553' # 0xD6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ '\u256b' # 0xD7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ '\u256a' # 0xD8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ '\u2518' # 0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0xDB -> FULL BLOCK
+ '\u2584' # 0xDC -> LOWER HALF BLOCK
+ '\u258c' # 0xDD -> LEFT HALF BLOCK
+ '\u2590' # 0xDE -> RIGHT HALF BLOCK
+ '\u2580' # 0xDF -> UPPER HALF BLOCK
+ '\u0636' # 0xE0 -> ARABIC LETTER DAD
+ '\u0637' # 0xE1 -> ARABIC LETTER TAH
+ '\u0638' # 0xE2 -> ARABIC LETTER ZAH
+ '\u0639' # 0xE3 -> ARABIC LETTER AIN
+ '\u063a' # 0xE4 -> ARABIC LETTER GHAIN
+ '\u0641' # 0xE5 -> ARABIC LETTER FEH
+ '\xb5' # 0xE6 -> MICRO SIGN
+ '\u0642' # 0xE7 -> ARABIC LETTER QAF
+ '\u0643' # 0xE8 -> ARABIC LETTER KAF
+ '\u0644' # 0xE9 -> ARABIC LETTER LAM
+ '\u0645' # 0xEA -> ARABIC LETTER MEEM
+ '\u0646' # 0xEB -> ARABIC LETTER NOON
+ '\u0647' # 0xEC -> ARABIC LETTER HEH
+ '\u0648' # 0xED -> ARABIC LETTER WAW
+ '\u0649' # 0xEE -> ARABIC LETTER ALEF MAKSURA
+ '\u064a' # 0xEF -> ARABIC LETTER YEH
+ '\u2261' # 0xF0 -> IDENTICAL TO
+ '\u064b' # 0xF1 -> ARABIC FATHATAN
+ '\u064c' # 0xF2 -> ARABIC DAMMATAN
+ '\u064d' # 0xF3 -> ARABIC KASRATAN
+ '\u064e' # 0xF4 -> ARABIC FATHA
+ '\u064f' # 0xF5 -> ARABIC DAMMA
+ '\u0650' # 0xF6 -> ARABIC KASRA
+ '\u2248' # 0xF7 -> ALMOST EQUAL TO
+ '\xb0' # 0xF8 -> DEGREE SIGN
+ '\u2219' # 0xF9 -> BULLET OPERATOR
+ '\xb7' # 0xFA -> MIDDLE DOT
+ '\u221a' # 0xFB -> SQUARE ROOT
+ '\u207f' # 0xFC -> SUPERSCRIPT LATIN SMALL LETTER N
+ '\xb2' # 0xFD -> SUPERSCRIPT TWO
+ '\u25a0' # 0xFE -> BLACK SQUARE
+ '\xa0' # 0xFF -> NO-BREAK SPACE
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp737.py b/dist/lib/encodings/cp737.py
new file mode 100644
index 0000000..9685bae
--- /dev/null
+++ b/dist/lib/encodings/cp737.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec cp737 generated from 'VENDORS/MICSFT/PC/CP737.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp737',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA
+ 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA
+ 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA
+ 0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON
+ 0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA
+ 0x0086: 0x0397, # GREEK CAPITAL LETTER ETA
+ 0x0087: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA
+ 0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA
+ 0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA
+ 0x008b: 0x039c, # GREEK CAPITAL LETTER MU
+ 0x008c: 0x039d, # GREEK CAPITAL LETTER NU
+ 0x008d: 0x039e, # GREEK CAPITAL LETTER XI
+ 0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON
+ 0x008f: 0x03a0, # GREEK CAPITAL LETTER PI
+ 0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO
+ 0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU
+ 0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON
+ 0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI
+ 0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI
+ 0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x0099: 0x03b2, # GREEK SMALL LETTER BETA
+ 0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA
+ 0x009b: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x009d: 0x03b6, # GREEK SMALL LETTER ZETA
+ 0x009e: 0x03b7, # GREEK SMALL LETTER ETA
+ 0x009f: 0x03b8, # GREEK SMALL LETTER THETA
+ 0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA
+ 0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA
+ 0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA
+ 0x00a3: 0x03bc, # GREEK SMALL LETTER MU
+ 0x00a4: 0x03bd, # GREEK SMALL LETTER NU
+ 0x00a5: 0x03be, # GREEK SMALL LETTER XI
+ 0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON
+ 0x00a7: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00a8: 0x03c1, # GREEK SMALL LETTER RHO
+ 0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
+ 0x00ab: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON
+ 0x00ad: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00ae: 0x03c7, # GREEK SMALL LETTER CHI
+ 0x00af: 0x03c8, # GREEK SMALL LETTER PSI
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA
+ 0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
+ 0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
+ 0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
+ 0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ 0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
+ 0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
+ 0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
+ 0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ 0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
+ 0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
+ 0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
+ 0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
+ 0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
+ 0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
+ 0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
+ 0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
+ 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
+ 0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ 0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x2248, # ALMOST EQUAL TO
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\u0391' # 0x0080 -> GREEK CAPITAL LETTER ALPHA
+ '\u0392' # 0x0081 -> GREEK CAPITAL LETTER BETA
+ '\u0393' # 0x0082 -> GREEK CAPITAL LETTER GAMMA
+ '\u0394' # 0x0083 -> GREEK CAPITAL LETTER DELTA
+ '\u0395' # 0x0084 -> GREEK CAPITAL LETTER EPSILON
+ '\u0396' # 0x0085 -> GREEK CAPITAL LETTER ZETA
+ '\u0397' # 0x0086 -> GREEK CAPITAL LETTER ETA
+ '\u0398' # 0x0087 -> GREEK CAPITAL LETTER THETA
+ '\u0399' # 0x0088 -> GREEK CAPITAL LETTER IOTA
+ '\u039a' # 0x0089 -> GREEK CAPITAL LETTER KAPPA
+ '\u039b' # 0x008a -> GREEK CAPITAL LETTER LAMDA
+ '\u039c' # 0x008b -> GREEK CAPITAL LETTER MU
+ '\u039d' # 0x008c -> GREEK CAPITAL LETTER NU
+ '\u039e' # 0x008d -> GREEK CAPITAL LETTER XI
+ '\u039f' # 0x008e -> GREEK CAPITAL LETTER OMICRON
+ '\u03a0' # 0x008f -> GREEK CAPITAL LETTER PI
+ '\u03a1' # 0x0090 -> GREEK CAPITAL LETTER RHO
+ '\u03a3' # 0x0091 -> GREEK CAPITAL LETTER SIGMA
+ '\u03a4' # 0x0092 -> GREEK CAPITAL LETTER TAU
+ '\u03a5' # 0x0093 -> GREEK CAPITAL LETTER UPSILON
+ '\u03a6' # 0x0094 -> GREEK CAPITAL LETTER PHI
+ '\u03a7' # 0x0095 -> GREEK CAPITAL LETTER CHI
+ '\u03a8' # 0x0096 -> GREEK CAPITAL LETTER PSI
+ '\u03a9' # 0x0097 -> GREEK CAPITAL LETTER OMEGA
+ '\u03b1' # 0x0098 -> GREEK SMALL LETTER ALPHA
+ '\u03b2' # 0x0099 -> GREEK SMALL LETTER BETA
+ '\u03b3' # 0x009a -> GREEK SMALL LETTER GAMMA
+ '\u03b4' # 0x009b -> GREEK SMALL LETTER DELTA
+ '\u03b5' # 0x009c -> GREEK SMALL LETTER EPSILON
+ '\u03b6' # 0x009d -> GREEK SMALL LETTER ZETA
+ '\u03b7' # 0x009e -> GREEK SMALL LETTER ETA
+ '\u03b8' # 0x009f -> GREEK SMALL LETTER THETA
+ '\u03b9' # 0x00a0 -> GREEK SMALL LETTER IOTA
+ '\u03ba' # 0x00a1 -> GREEK SMALL LETTER KAPPA
+ '\u03bb' # 0x00a2 -> GREEK SMALL LETTER LAMDA
+ '\u03bc' # 0x00a3 -> GREEK SMALL LETTER MU
+ '\u03bd' # 0x00a4 -> GREEK SMALL LETTER NU
+ '\u03be' # 0x00a5 -> GREEK SMALL LETTER XI
+ '\u03bf' # 0x00a6 -> GREEK SMALL LETTER OMICRON
+ '\u03c0' # 0x00a7 -> GREEK SMALL LETTER PI
+ '\u03c1' # 0x00a8 -> GREEK SMALL LETTER RHO
+ '\u03c3' # 0x00a9 -> GREEK SMALL LETTER SIGMA
+ '\u03c2' # 0x00aa -> GREEK SMALL LETTER FINAL SIGMA
+ '\u03c4' # 0x00ab -> GREEK SMALL LETTER TAU
+ '\u03c5' # 0x00ac -> GREEK SMALL LETTER UPSILON
+ '\u03c6' # 0x00ad -> GREEK SMALL LETTER PHI
+ '\u03c7' # 0x00ae -> GREEK SMALL LETTER CHI
+ '\u03c8' # 0x00af -> GREEK SMALL LETTER PSI
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ '\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ '\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ '\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ '\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ '\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ '\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ '\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ '\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ '\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ '\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ '\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ '\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ '\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ '\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\u258c' # 0x00dd -> LEFT HALF BLOCK
+ '\u2590' # 0x00de -> RIGHT HALF BLOCK
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\u03c9' # 0x00e0 -> GREEK SMALL LETTER OMEGA
+ '\u03ac' # 0x00e1 -> GREEK SMALL LETTER ALPHA WITH TONOS
+ '\u03ad' # 0x00e2 -> GREEK SMALL LETTER EPSILON WITH TONOS
+ '\u03ae' # 0x00e3 -> GREEK SMALL LETTER ETA WITH TONOS
+ '\u03ca' # 0x00e4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ '\u03af' # 0x00e5 -> GREEK SMALL LETTER IOTA WITH TONOS
+ '\u03cc' # 0x00e6 -> GREEK SMALL LETTER OMICRON WITH TONOS
+ '\u03cd' # 0x00e7 -> GREEK SMALL LETTER UPSILON WITH TONOS
+ '\u03cb' # 0x00e8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ '\u03ce' # 0x00e9 -> GREEK SMALL LETTER OMEGA WITH TONOS
+ '\u0386' # 0x00ea -> GREEK CAPITAL LETTER ALPHA WITH TONOS
+ '\u0388' # 0x00eb -> GREEK CAPITAL LETTER EPSILON WITH TONOS
+ '\u0389' # 0x00ec -> GREEK CAPITAL LETTER ETA WITH TONOS
+ '\u038a' # 0x00ed -> GREEK CAPITAL LETTER IOTA WITH TONOS
+ '\u038c' # 0x00ee -> GREEK CAPITAL LETTER OMICRON WITH TONOS
+ '\u038e' # 0x00ef -> GREEK CAPITAL LETTER UPSILON WITH TONOS
+ '\u038f' # 0x00f0 -> GREEK CAPITAL LETTER OMEGA WITH TONOS
+ '\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ '\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
+ '\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
+ '\u03aa' # 0x00f4 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ '\u03ab' # 0x00f5 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ '\xf7' # 0x00f6 -> DIVISION SIGN
+ '\u2248' # 0x00f7 -> ALMOST EQUAL TO
+ '\xb0' # 0x00f8 -> DEGREE SIGN
+ '\u2219' # 0x00f9 -> BULLET OPERATOR
+ '\xb7' # 0x00fa -> MIDDLE DOT
+ '\u221a' # 0x00fb -> SQUARE ROOT
+ '\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+ '\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x0386: 0x00ea, # GREEK CAPITAL LETTER ALPHA WITH TONOS
+ 0x0388: 0x00eb, # GREEK CAPITAL LETTER EPSILON WITH TONOS
+ 0x0389: 0x00ec, # GREEK CAPITAL LETTER ETA WITH TONOS
+ 0x038a: 0x00ed, # GREEK CAPITAL LETTER IOTA WITH TONOS
+ 0x038c: 0x00ee, # GREEK CAPITAL LETTER OMICRON WITH TONOS
+ 0x038e: 0x00ef, # GREEK CAPITAL LETTER UPSILON WITH TONOS
+ 0x038f: 0x00f0, # GREEK CAPITAL LETTER OMEGA WITH TONOS
+ 0x0391: 0x0080, # GREEK CAPITAL LETTER ALPHA
+ 0x0392: 0x0081, # GREEK CAPITAL LETTER BETA
+ 0x0393: 0x0082, # GREEK CAPITAL LETTER GAMMA
+ 0x0394: 0x0083, # GREEK CAPITAL LETTER DELTA
+ 0x0395: 0x0084, # GREEK CAPITAL LETTER EPSILON
+ 0x0396: 0x0085, # GREEK CAPITAL LETTER ZETA
+ 0x0397: 0x0086, # GREEK CAPITAL LETTER ETA
+ 0x0398: 0x0087, # GREEK CAPITAL LETTER THETA
+ 0x0399: 0x0088, # GREEK CAPITAL LETTER IOTA
+ 0x039a: 0x0089, # GREEK CAPITAL LETTER KAPPA
+ 0x039b: 0x008a, # GREEK CAPITAL LETTER LAMDA
+ 0x039c: 0x008b, # GREEK CAPITAL LETTER MU
+ 0x039d: 0x008c, # GREEK CAPITAL LETTER NU
+ 0x039e: 0x008d, # GREEK CAPITAL LETTER XI
+ 0x039f: 0x008e, # GREEK CAPITAL LETTER OMICRON
+ 0x03a0: 0x008f, # GREEK CAPITAL LETTER PI
+ 0x03a1: 0x0090, # GREEK CAPITAL LETTER RHO
+ 0x03a3: 0x0091, # GREEK CAPITAL LETTER SIGMA
+ 0x03a4: 0x0092, # GREEK CAPITAL LETTER TAU
+ 0x03a5: 0x0093, # GREEK CAPITAL LETTER UPSILON
+ 0x03a6: 0x0094, # GREEK CAPITAL LETTER PHI
+ 0x03a7: 0x0095, # GREEK CAPITAL LETTER CHI
+ 0x03a8: 0x0096, # GREEK CAPITAL LETTER PSI
+ 0x03a9: 0x0097, # GREEK CAPITAL LETTER OMEGA
+ 0x03aa: 0x00f4, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ 0x03ab: 0x00f5, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ 0x03ac: 0x00e1, # GREEK SMALL LETTER ALPHA WITH TONOS
+ 0x03ad: 0x00e2, # GREEK SMALL LETTER EPSILON WITH TONOS
+ 0x03ae: 0x00e3, # GREEK SMALL LETTER ETA WITH TONOS
+ 0x03af: 0x00e5, # GREEK SMALL LETTER IOTA WITH TONOS
+ 0x03b1: 0x0098, # GREEK SMALL LETTER ALPHA
+ 0x03b2: 0x0099, # GREEK SMALL LETTER BETA
+ 0x03b3: 0x009a, # GREEK SMALL LETTER GAMMA
+ 0x03b4: 0x009b, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x009c, # GREEK SMALL LETTER EPSILON
+ 0x03b6: 0x009d, # GREEK SMALL LETTER ZETA
+ 0x03b7: 0x009e, # GREEK SMALL LETTER ETA
+ 0x03b8: 0x009f, # GREEK SMALL LETTER THETA
+ 0x03b9: 0x00a0, # GREEK SMALL LETTER IOTA
+ 0x03ba: 0x00a1, # GREEK SMALL LETTER KAPPA
+ 0x03bb: 0x00a2, # GREEK SMALL LETTER LAMDA
+ 0x03bc: 0x00a3, # GREEK SMALL LETTER MU
+ 0x03bd: 0x00a4, # GREEK SMALL LETTER NU
+ 0x03be: 0x00a5, # GREEK SMALL LETTER XI
+ 0x03bf: 0x00a6, # GREEK SMALL LETTER OMICRON
+ 0x03c0: 0x00a7, # GREEK SMALL LETTER PI
+ 0x03c1: 0x00a8, # GREEK SMALL LETTER RHO
+ 0x03c2: 0x00aa, # GREEK SMALL LETTER FINAL SIGMA
+ 0x03c3: 0x00a9, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00ab, # GREEK SMALL LETTER TAU
+ 0x03c5: 0x00ac, # GREEK SMALL LETTER UPSILON
+ 0x03c6: 0x00ad, # GREEK SMALL LETTER PHI
+ 0x03c7: 0x00ae, # GREEK SMALL LETTER CHI
+ 0x03c8: 0x00af, # GREEK SMALL LETTER PSI
+ 0x03c9: 0x00e0, # GREEK SMALL LETTER OMEGA
+ 0x03ca: 0x00e4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ 0x03cb: 0x00e8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ 0x03cc: 0x00e6, # GREEK SMALL LETTER OMICRON WITH TONOS
+ 0x03cd: 0x00e7, # GREEK SMALL LETTER UPSILON WITH TONOS
+ 0x03ce: 0x00e9, # GREEK SMALL LETTER OMEGA WITH TONOS
+ 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x2248: 0x00f7, # ALMOST EQUAL TO
+ 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
+ 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp775.py b/dist/lib/encodings/cp775.py
new file mode 100644
index 0000000..fe06e7b
--- /dev/null
+++ b/dist/lib/encodings/cp775.py
@@ -0,0 +1,697 @@
+""" Python Character Mapping Codec cp775 generated from 'VENDORS/MICSFT/PC/CP775.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp775',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA
+ 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
+ 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE
+ 0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON
+ 0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA
+ 0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA
+ 0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON
+ 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
+ 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
+ 0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA
+ 0x0096: 0x00a2, # CENT SIGN
+ 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
+ 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x009e: 0x00d7, # MULTIPLICATION SIGN
+ 0x009f: 0x00a4, # CURRENCY SIGN
+ 0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON
+ 0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ 0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
+ 0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
+ 0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK
+ 0x00a7: 0x00a6, # BROKEN BAR
+ 0x00a8: 0x00a9, # COPYRIGHT SIGN
+ 0x00a9: 0x00ae, # REGISTERED SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
+ 0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
+ 0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
+ 0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK
+ 0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK
+ 0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
+ 0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
+ 0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON
+ 0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
+ 0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE
+ 0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK
+ 0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON
+ 0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK
+ 0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON
+ 0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
+ 0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON
+ 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
+ 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
+ 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
+ 0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA
+ 0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA
+ 0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA
+ 0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA
+ 0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA
+ 0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON
+ 0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA
+ 0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK
+ 0x00f0: 0x00ad, # SOFT HYPHEN
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK
+ 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
+ 0x00f4: 0x00b6, # PILCROW SIGN
+ 0x00f5: 0x00a7, # SECTION SIGN
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x00b9, # SUPERSCRIPT ONE
+ 0x00fc: 0x00b3, # SUPERSCRIPT THREE
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\u0106' # 0x0080 -> LATIN CAPITAL LETTER C WITH ACUTE
+ '\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ '\u0101' # 0x0083 -> LATIN SMALL LETTER A WITH MACRON
+ '\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\u0123' # 0x0085 -> LATIN SMALL LETTER G WITH CEDILLA
+ '\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\u0107' # 0x0087 -> LATIN SMALL LETTER C WITH ACUTE
+ '\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE
+ '\u0113' # 0x0089 -> LATIN SMALL LETTER E WITH MACRON
+ '\u0156' # 0x008a -> LATIN CAPITAL LETTER R WITH CEDILLA
+ '\u0157' # 0x008b -> LATIN SMALL LETTER R WITH CEDILLA
+ '\u012b' # 0x008c -> LATIN SMALL LETTER I WITH MACRON
+ '\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE
+ '\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
+ '\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
+ '\u014d' # 0x0093 -> LATIN SMALL LETTER O WITH MACRON
+ '\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\u0122' # 0x0095 -> LATIN CAPITAL LETTER G WITH CEDILLA
+ '\xa2' # 0x0096 -> CENT SIGN
+ '\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE
+ '\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE
+ '\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
+ '\xa3' # 0x009c -> POUND SIGN
+ '\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+ '\xd7' # 0x009e -> MULTIPLICATION SIGN
+ '\xa4' # 0x009f -> CURRENCY SIGN
+ '\u0100' # 0x00a0 -> LATIN CAPITAL LETTER A WITH MACRON
+ '\u012a' # 0x00a1 -> LATIN CAPITAL LETTER I WITH MACRON
+ '\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ '\u017b' # 0x00a3 -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ '\u017c' # 0x00a4 -> LATIN SMALL LETTER Z WITH DOT ABOVE
+ '\u017a' # 0x00a5 -> LATIN SMALL LETTER Z WITH ACUTE
+ '\u201d' # 0x00a6 -> RIGHT DOUBLE QUOTATION MARK
+ '\xa6' # 0x00a7 -> BROKEN BAR
+ '\xa9' # 0x00a8 -> COPYRIGHT SIGN
+ '\xae' # 0x00a9 -> REGISTERED SIGN
+ '\xac' # 0x00aa -> NOT SIGN
+ '\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ '\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ '\u0141' # 0x00ad -> LATIN CAPITAL LETTER L WITH STROKE
+ '\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\u0104' # 0x00b5 -> LATIN CAPITAL LETTER A WITH OGONEK
+ '\u010c' # 0x00b6 -> LATIN CAPITAL LETTER C WITH CARON
+ '\u0118' # 0x00b7 -> LATIN CAPITAL LETTER E WITH OGONEK
+ '\u0116' # 0x00b8 -> LATIN CAPITAL LETTER E WITH DOT ABOVE
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\u012e' # 0x00bd -> LATIN CAPITAL LETTER I WITH OGONEK
+ '\u0160' # 0x00be -> LATIN CAPITAL LETTER S WITH CARON
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\u0172' # 0x00c6 -> LATIN CAPITAL LETTER U WITH OGONEK
+ '\u016a' # 0x00c7 -> LATIN CAPITAL LETTER U WITH MACRON
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\u017d' # 0x00cf -> LATIN CAPITAL LETTER Z WITH CARON
+ '\u0105' # 0x00d0 -> LATIN SMALL LETTER A WITH OGONEK
+ '\u010d' # 0x00d1 -> LATIN SMALL LETTER C WITH CARON
+ '\u0119' # 0x00d2 -> LATIN SMALL LETTER E WITH OGONEK
+ '\u0117' # 0x00d3 -> LATIN SMALL LETTER E WITH DOT ABOVE
+ '\u012f' # 0x00d4 -> LATIN SMALL LETTER I WITH OGONEK
+ '\u0161' # 0x00d5 -> LATIN SMALL LETTER S WITH CARON
+ '\u0173' # 0x00d6 -> LATIN SMALL LETTER U WITH OGONEK
+ '\u016b' # 0x00d7 -> LATIN SMALL LETTER U WITH MACRON
+ '\u017e' # 0x00d8 -> LATIN SMALL LETTER Z WITH CARON
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\u258c' # 0x00dd -> LEFT HALF BLOCK
+ '\u2590' # 0x00de -> RIGHT HALF BLOCK
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN)
+ '\u014c' # 0x00e2 -> LATIN CAPITAL LETTER O WITH MACRON
+ '\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE
+ '\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE
+ '\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
+ '\xb5' # 0x00e6 -> MICRO SIGN
+ '\u0144' # 0x00e7 -> LATIN SMALL LETTER N WITH ACUTE
+ '\u0136' # 0x00e8 -> LATIN CAPITAL LETTER K WITH CEDILLA
+ '\u0137' # 0x00e9 -> LATIN SMALL LETTER K WITH CEDILLA
+ '\u013b' # 0x00ea -> LATIN CAPITAL LETTER L WITH CEDILLA
+ '\u013c' # 0x00eb -> LATIN SMALL LETTER L WITH CEDILLA
+ '\u0146' # 0x00ec -> LATIN SMALL LETTER N WITH CEDILLA
+ '\u0112' # 0x00ed -> LATIN CAPITAL LETTER E WITH MACRON
+ '\u0145' # 0x00ee -> LATIN CAPITAL LETTER N WITH CEDILLA
+ '\u2019' # 0x00ef -> RIGHT SINGLE QUOTATION MARK
+ '\xad' # 0x00f0 -> SOFT HYPHEN
+ '\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ '\u201c' # 0x00f2 -> LEFT DOUBLE QUOTATION MARK
+ '\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS
+ '\xb6' # 0x00f4 -> PILCROW SIGN
+ '\xa7' # 0x00f5 -> SECTION SIGN
+ '\xf7' # 0x00f6 -> DIVISION SIGN
+ '\u201e' # 0x00f7 -> DOUBLE LOW-9 QUOTATION MARK
+ '\xb0' # 0x00f8 -> DEGREE SIGN
+ '\u2219' # 0x00f9 -> BULLET OPERATOR
+ '\xb7' # 0x00fa -> MIDDLE DOT
+ '\xb9' # 0x00fb -> SUPERSCRIPT ONE
+ '\xb3' # 0x00fc -> SUPERSCRIPT THREE
+ '\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a2: 0x0096, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a4: 0x009f, # CURRENCY SIGN
+ 0x00a6: 0x00a7, # BROKEN BAR
+ 0x00a7: 0x00f5, # SECTION SIGN
+ 0x00a9: 0x00a8, # COPYRIGHT SIGN
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00ad: 0x00f0, # SOFT HYPHEN
+ 0x00ae: 0x00a9, # REGISTERED SIGN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b3: 0x00fc, # SUPERSCRIPT THREE
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b6: 0x00f4, # PILCROW SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00b9: 0x00fb, # SUPERSCRIPT ONE
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00d7: 0x009e, # MULTIPLICATION SIGN
+ 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN)
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0100: 0x00a0, # LATIN CAPITAL LETTER A WITH MACRON
+ 0x0101: 0x0083, # LATIN SMALL LETTER A WITH MACRON
+ 0x0104: 0x00b5, # LATIN CAPITAL LETTER A WITH OGONEK
+ 0x0105: 0x00d0, # LATIN SMALL LETTER A WITH OGONEK
+ 0x0106: 0x0080, # LATIN CAPITAL LETTER C WITH ACUTE
+ 0x0107: 0x0087, # LATIN SMALL LETTER C WITH ACUTE
+ 0x010c: 0x00b6, # LATIN CAPITAL LETTER C WITH CARON
+ 0x010d: 0x00d1, # LATIN SMALL LETTER C WITH CARON
+ 0x0112: 0x00ed, # LATIN CAPITAL LETTER E WITH MACRON
+ 0x0113: 0x0089, # LATIN SMALL LETTER E WITH MACRON
+ 0x0116: 0x00b8, # LATIN CAPITAL LETTER E WITH DOT ABOVE
+ 0x0117: 0x00d3, # LATIN SMALL LETTER E WITH DOT ABOVE
+ 0x0118: 0x00b7, # LATIN CAPITAL LETTER E WITH OGONEK
+ 0x0119: 0x00d2, # LATIN SMALL LETTER E WITH OGONEK
+ 0x0122: 0x0095, # LATIN CAPITAL LETTER G WITH CEDILLA
+ 0x0123: 0x0085, # LATIN SMALL LETTER G WITH CEDILLA
+ 0x012a: 0x00a1, # LATIN CAPITAL LETTER I WITH MACRON
+ 0x012b: 0x008c, # LATIN SMALL LETTER I WITH MACRON
+ 0x012e: 0x00bd, # LATIN CAPITAL LETTER I WITH OGONEK
+ 0x012f: 0x00d4, # LATIN SMALL LETTER I WITH OGONEK
+ 0x0136: 0x00e8, # LATIN CAPITAL LETTER K WITH CEDILLA
+ 0x0137: 0x00e9, # LATIN SMALL LETTER K WITH CEDILLA
+ 0x013b: 0x00ea, # LATIN CAPITAL LETTER L WITH CEDILLA
+ 0x013c: 0x00eb, # LATIN SMALL LETTER L WITH CEDILLA
+ 0x0141: 0x00ad, # LATIN CAPITAL LETTER L WITH STROKE
+ 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE
+ 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE
+ 0x0144: 0x00e7, # LATIN SMALL LETTER N WITH ACUTE
+ 0x0145: 0x00ee, # LATIN CAPITAL LETTER N WITH CEDILLA
+ 0x0146: 0x00ec, # LATIN SMALL LETTER N WITH CEDILLA
+ 0x014c: 0x00e2, # LATIN CAPITAL LETTER O WITH MACRON
+ 0x014d: 0x0093, # LATIN SMALL LETTER O WITH MACRON
+ 0x0156: 0x008a, # LATIN CAPITAL LETTER R WITH CEDILLA
+ 0x0157: 0x008b, # LATIN SMALL LETTER R WITH CEDILLA
+ 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE
+ 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE
+ 0x0160: 0x00be, # LATIN CAPITAL LETTER S WITH CARON
+ 0x0161: 0x00d5, # LATIN SMALL LETTER S WITH CARON
+ 0x016a: 0x00c7, # LATIN CAPITAL LETTER U WITH MACRON
+ 0x016b: 0x00d7, # LATIN SMALL LETTER U WITH MACRON
+ 0x0172: 0x00c6, # LATIN CAPITAL LETTER U WITH OGONEK
+ 0x0173: 0x00d6, # LATIN SMALL LETTER U WITH OGONEK
+ 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE
+ 0x017a: 0x00a5, # LATIN SMALL LETTER Z WITH ACUTE
+ 0x017b: 0x00a3, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ 0x017c: 0x00a4, # LATIN SMALL LETTER Z WITH DOT ABOVE
+ 0x017d: 0x00cf, # LATIN CAPITAL LETTER Z WITH CARON
+ 0x017e: 0x00d8, # LATIN SMALL LETTER Z WITH CARON
+ 0x2019: 0x00ef, # RIGHT SINGLE QUOTATION MARK
+ 0x201c: 0x00f2, # LEFT DOUBLE QUOTATION MARK
+ 0x201d: 0x00a6, # RIGHT DOUBLE QUOTATION MARK
+ 0x201e: 0x00f7, # DOUBLE LOW-9 QUOTATION MARK
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp850.py b/dist/lib/encodings/cp850.py
new file mode 100644
index 0000000..f98aef9
--- /dev/null
+++ b/dist/lib/encodings/cp850.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP850.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp850',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
+ 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
+ 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
+ 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x009e: 0x00d7, # MULTIPLICATION SIGN
+ 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
+ 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
+ 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x00ae, # REGISTERED SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00b8: 0x00a9, # COPYRIGHT SIGN
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x00a2, # CENT SIGN
+ 0x00be: 0x00a5, # YEN SIGN
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
+ 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x00a4, # CURRENCY SIGN
+ 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH
+ 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH
+ 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I
+ 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x00a6, # BROKEN BAR
+ 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
+ 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN
+ 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN
+ 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x00ee: 0x00af, # MACRON
+ 0x00ef: 0x00b4, # ACUTE ACCENT
+ 0x00f0: 0x00ad, # SOFT HYPHEN
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2017, # DOUBLE LOW LINE
+ 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
+ 0x00f4: 0x00b6, # PILCROW SIGN
+ 0x00f5: 0x00a7, # SECTION SIGN
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x00b8, # CEDILLA
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x00a8, # DIAERESIS
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x00b9, # SUPERSCRIPT ONE
+ 0x00fc: 0x00b3, # SUPERSCRIPT THREE
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ '\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE
+ '\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
+ '\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
+ '\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+ '\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+ '\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
+ '\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
+ '\xa3' # 0x009c -> POUND SIGN
+ '\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+ '\xd7' # 0x009e -> MULTIPLICATION SIGN
+ '\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
+ '\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ '\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+ '\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
+ '\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
+ '\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ '\xae' # 0x00a9 -> REGISTERED SIGN
+ '\xac' # 0x00aa -> NOT SIGN
+ '\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ '\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ '\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ '\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xa9' # 0x00b8 -> COPYRIGHT SIGN
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\xa2' # 0x00bd -> CENT SIGN
+ '\xa5' # 0x00be -> YEN SIGN
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE
+ '\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\xa4' # 0x00cf -> CURRENCY SIGN
+ '\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH
+ '\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH
+ '\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\u0131' # 0x00d5 -> LATIN SMALL LETTER DOTLESS I
+ '\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\xa6' # 0x00dd -> BROKEN BAR
+ '\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ '\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE
+ '\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE
+ '\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
+ '\xb5' # 0x00e6 -> MICRO SIGN
+ '\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN
+ '\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN
+ '\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ '\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE
+ '\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE
+ '\xaf' # 0x00ee -> MACRON
+ '\xb4' # 0x00ef -> ACUTE ACCENT
+ '\xad' # 0x00f0 -> SOFT HYPHEN
+ '\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ '\u2017' # 0x00f2 -> DOUBLE LOW LINE
+ '\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS
+ '\xb6' # 0x00f4 -> PILCROW SIGN
+ '\xa7' # 0x00f5 -> SECTION SIGN
+ '\xf7' # 0x00f6 -> DIVISION SIGN
+ '\xb8' # 0x00f7 -> CEDILLA
+ '\xb0' # 0x00f8 -> DEGREE SIGN
+ '\xa8' # 0x00f9 -> DIAERESIS
+ '\xb7' # 0x00fa -> MIDDLE DOT
+ '\xb9' # 0x00fb -> SUPERSCRIPT ONE
+ '\xb3' # 0x00fc -> SUPERSCRIPT THREE
+ '\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a2: 0x00bd, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a4: 0x00cf, # CURRENCY SIGN
+ 0x00a5: 0x00be, # YEN SIGN
+ 0x00a6: 0x00dd, # BROKEN BAR
+ 0x00a7: 0x00f5, # SECTION SIGN
+ 0x00a8: 0x00f9, # DIAERESIS
+ 0x00a9: 0x00b8, # COPYRIGHT SIGN
+ 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00ad: 0x00f0, # SOFT HYPHEN
+ 0x00ae: 0x00a9, # REGISTERED SIGN
+ 0x00af: 0x00ee, # MACRON
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b3: 0x00fc, # SUPERSCRIPT THREE
+ 0x00b4: 0x00ef, # ACUTE ACCENT
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b6: 0x00f4, # PILCROW SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00b8: 0x00f7, # CEDILLA
+ 0x00b9: 0x00fb, # SUPERSCRIPT ONE
+ 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH
+ 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00d7: 0x009e, # MULTIPLICATION SIGN
+ 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x00f0: 0x00d0, # LATIN SMALL LETTER ETH
+ 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
+ 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
+ 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x00fe: 0x00e7, # LATIN SMALL LETTER THORN
+ 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x0131: 0x00d5, # LATIN SMALL LETTER DOTLESS I
+ 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
+ 0x2017: 0x00f2, # DOUBLE LOW LINE
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp852.py b/dist/lib/encodings/cp852.py
new file mode 100644
index 0000000..34d8a0e
--- /dev/null
+++ b/dist/lib/encodings/cp852.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP852.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp852',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE
+ 0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+ 0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
+ 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE
+ 0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON
+ 0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON
+ 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE
+ 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON
+ 0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON
+ 0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE
+ 0x009e: 0x00d7, # MULTIPLICATION SIGN
+ 0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK
+ 0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK
+ 0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON
+ 0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON
+ 0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK
+ 0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE
+ 0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON
+ 0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON
+ 0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ 0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE
+ 0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x00a4, # CURRENCY SIGN
+ 0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE
+ 0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE
+ 0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON
+ 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON
+ 0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON
+ 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA
+ 0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE
+ 0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE
+ 0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON
+ 0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON
+ 0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON
+ 0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE
+ 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE
+ 0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+ 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA
+ 0x00ef: 0x00b4, # ACUTE ACCENT
+ 0x00f0: 0x00ad, # SOFT HYPHEN
+ 0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT
+ 0x00f2: 0x02db, # OGONEK
+ 0x00f3: 0x02c7, # CARON
+ 0x00f4: 0x02d8, # BREVE
+ 0x00f5: 0x00a7, # SECTION SIGN
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x00b8, # CEDILLA
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x00a8, # DIAERESIS
+ 0x00fa: 0x02d9, # DOT ABOVE
+ 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
+ 0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON
+ 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\u016f' # 0x0085 -> LATIN SMALL LETTER U WITH RING ABOVE
+ '\u0107' # 0x0086 -> LATIN SMALL LETTER C WITH ACUTE
+ '\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE
+ '\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\u0150' # 0x008a -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+ '\u0151' # 0x008b -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
+ '\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE
+ '\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\u0106' # 0x008f -> LATIN CAPITAL LETTER C WITH ACUTE
+ '\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\u0139' # 0x0091 -> LATIN CAPITAL LETTER L WITH ACUTE
+ '\u013a' # 0x0092 -> LATIN SMALL LETTER L WITH ACUTE
+ '\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\u013d' # 0x0095 -> LATIN CAPITAL LETTER L WITH CARON
+ '\u013e' # 0x0096 -> LATIN SMALL LETTER L WITH CARON
+ '\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE
+ '\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE
+ '\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\u0164' # 0x009b -> LATIN CAPITAL LETTER T WITH CARON
+ '\u0165' # 0x009c -> LATIN SMALL LETTER T WITH CARON
+ '\u0141' # 0x009d -> LATIN CAPITAL LETTER L WITH STROKE
+ '\xd7' # 0x009e -> MULTIPLICATION SIGN
+ '\u010d' # 0x009f -> LATIN SMALL LETTER C WITH CARON
+ '\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ '\u0104' # 0x00a4 -> LATIN CAPITAL LETTER A WITH OGONEK
+ '\u0105' # 0x00a5 -> LATIN SMALL LETTER A WITH OGONEK
+ '\u017d' # 0x00a6 -> LATIN CAPITAL LETTER Z WITH CARON
+ '\u017e' # 0x00a7 -> LATIN SMALL LETTER Z WITH CARON
+ '\u0118' # 0x00a8 -> LATIN CAPITAL LETTER E WITH OGONEK
+ '\u0119' # 0x00a9 -> LATIN SMALL LETTER E WITH OGONEK
+ '\xac' # 0x00aa -> NOT SIGN
+ '\u017a' # 0x00ab -> LATIN SMALL LETTER Z WITH ACUTE
+ '\u010c' # 0x00ac -> LATIN CAPITAL LETTER C WITH CARON
+ '\u015f' # 0x00ad -> LATIN SMALL LETTER S WITH CEDILLA
+ '\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\u011a' # 0x00b7 -> LATIN CAPITAL LETTER E WITH CARON
+ '\u015e' # 0x00b8 -> LATIN CAPITAL LETTER S WITH CEDILLA
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\u017b' # 0x00bd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ '\u017c' # 0x00be -> LATIN SMALL LETTER Z WITH DOT ABOVE
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\u0102' # 0x00c6 -> LATIN CAPITAL LETTER A WITH BREVE
+ '\u0103' # 0x00c7 -> LATIN SMALL LETTER A WITH BREVE
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\xa4' # 0x00cf -> CURRENCY SIGN
+ '\u0111' # 0x00d0 -> LATIN SMALL LETTER D WITH STROKE
+ '\u0110' # 0x00d1 -> LATIN CAPITAL LETTER D WITH STROKE
+ '\u010e' # 0x00d2 -> LATIN CAPITAL LETTER D WITH CARON
+ '\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\u010f' # 0x00d4 -> LATIN SMALL LETTER D WITH CARON
+ '\u0147' # 0x00d5 -> LATIN CAPITAL LETTER N WITH CARON
+ '\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\u011b' # 0x00d8 -> LATIN SMALL LETTER E WITH CARON
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\u0162' # 0x00dd -> LATIN CAPITAL LETTER T WITH CEDILLA
+ '\u016e' # 0x00de -> LATIN CAPITAL LETTER U WITH RING ABOVE
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ '\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE
+ '\u0144' # 0x00e4 -> LATIN SMALL LETTER N WITH ACUTE
+ '\u0148' # 0x00e5 -> LATIN SMALL LETTER N WITH CARON
+ '\u0160' # 0x00e6 -> LATIN CAPITAL LETTER S WITH CARON
+ '\u0161' # 0x00e7 -> LATIN SMALL LETTER S WITH CARON
+ '\u0154' # 0x00e8 -> LATIN CAPITAL LETTER R WITH ACUTE
+ '\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\u0155' # 0x00ea -> LATIN SMALL LETTER R WITH ACUTE
+ '\u0170' # 0x00eb -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+ '\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE
+ '\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE
+ '\u0163' # 0x00ee -> LATIN SMALL LETTER T WITH CEDILLA
+ '\xb4' # 0x00ef -> ACUTE ACCENT
+ '\xad' # 0x00f0 -> SOFT HYPHEN
+ '\u02dd' # 0x00f1 -> DOUBLE ACUTE ACCENT
+ '\u02db' # 0x00f2 -> OGONEK
+ '\u02c7' # 0x00f3 -> CARON
+ '\u02d8' # 0x00f4 -> BREVE
+ '\xa7' # 0x00f5 -> SECTION SIGN
+ '\xf7' # 0x00f6 -> DIVISION SIGN
+ '\xb8' # 0x00f7 -> CEDILLA
+ '\xb0' # 0x00f8 -> DEGREE SIGN
+ '\xa8' # 0x00f9 -> DIAERESIS
+ '\u02d9' # 0x00fa -> DOT ABOVE
+ '\u0171' # 0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
+ '\u0158' # 0x00fc -> LATIN CAPITAL LETTER R WITH CARON
+ '\u0159' # 0x00fd -> LATIN SMALL LETTER R WITH CARON
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a4: 0x00cf, # CURRENCY SIGN
+ 0x00a7: 0x00f5, # SECTION SIGN
+ 0x00a8: 0x00f9, # DIAERESIS
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00ad: 0x00f0, # SOFT HYPHEN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b4: 0x00ef, # ACUTE ACCENT
+ 0x00b8: 0x00f7, # CEDILLA
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00d7: 0x009e, # MULTIPLICATION SIGN
+ 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x0102: 0x00c6, # LATIN CAPITAL LETTER A WITH BREVE
+ 0x0103: 0x00c7, # LATIN SMALL LETTER A WITH BREVE
+ 0x0104: 0x00a4, # LATIN CAPITAL LETTER A WITH OGONEK
+ 0x0105: 0x00a5, # LATIN SMALL LETTER A WITH OGONEK
+ 0x0106: 0x008f, # LATIN CAPITAL LETTER C WITH ACUTE
+ 0x0107: 0x0086, # LATIN SMALL LETTER C WITH ACUTE
+ 0x010c: 0x00ac, # LATIN CAPITAL LETTER C WITH CARON
+ 0x010d: 0x009f, # LATIN SMALL LETTER C WITH CARON
+ 0x010e: 0x00d2, # LATIN CAPITAL LETTER D WITH CARON
+ 0x010f: 0x00d4, # LATIN SMALL LETTER D WITH CARON
+ 0x0110: 0x00d1, # LATIN CAPITAL LETTER D WITH STROKE
+ 0x0111: 0x00d0, # LATIN SMALL LETTER D WITH STROKE
+ 0x0118: 0x00a8, # LATIN CAPITAL LETTER E WITH OGONEK
+ 0x0119: 0x00a9, # LATIN SMALL LETTER E WITH OGONEK
+ 0x011a: 0x00b7, # LATIN CAPITAL LETTER E WITH CARON
+ 0x011b: 0x00d8, # LATIN SMALL LETTER E WITH CARON
+ 0x0139: 0x0091, # LATIN CAPITAL LETTER L WITH ACUTE
+ 0x013a: 0x0092, # LATIN SMALL LETTER L WITH ACUTE
+ 0x013d: 0x0095, # LATIN CAPITAL LETTER L WITH CARON
+ 0x013e: 0x0096, # LATIN SMALL LETTER L WITH CARON
+ 0x0141: 0x009d, # LATIN CAPITAL LETTER L WITH STROKE
+ 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE
+ 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE
+ 0x0144: 0x00e4, # LATIN SMALL LETTER N WITH ACUTE
+ 0x0147: 0x00d5, # LATIN CAPITAL LETTER N WITH CARON
+ 0x0148: 0x00e5, # LATIN SMALL LETTER N WITH CARON
+ 0x0150: 0x008a, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+ 0x0151: 0x008b, # LATIN SMALL LETTER O WITH DOUBLE ACUTE
+ 0x0154: 0x00e8, # LATIN CAPITAL LETTER R WITH ACUTE
+ 0x0155: 0x00ea, # LATIN SMALL LETTER R WITH ACUTE
+ 0x0158: 0x00fc, # LATIN CAPITAL LETTER R WITH CARON
+ 0x0159: 0x00fd, # LATIN SMALL LETTER R WITH CARON
+ 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE
+ 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE
+ 0x015e: 0x00b8, # LATIN CAPITAL LETTER S WITH CEDILLA
+ 0x015f: 0x00ad, # LATIN SMALL LETTER S WITH CEDILLA
+ 0x0160: 0x00e6, # LATIN CAPITAL LETTER S WITH CARON
+ 0x0161: 0x00e7, # LATIN SMALL LETTER S WITH CARON
+ 0x0162: 0x00dd, # LATIN CAPITAL LETTER T WITH CEDILLA
+ 0x0163: 0x00ee, # LATIN SMALL LETTER T WITH CEDILLA
+ 0x0164: 0x009b, # LATIN CAPITAL LETTER T WITH CARON
+ 0x0165: 0x009c, # LATIN SMALL LETTER T WITH CARON
+ 0x016e: 0x00de, # LATIN CAPITAL LETTER U WITH RING ABOVE
+ 0x016f: 0x0085, # LATIN SMALL LETTER U WITH RING ABOVE
+ 0x0170: 0x00eb, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+ 0x0171: 0x00fb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE
+ 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE
+ 0x017a: 0x00ab, # LATIN SMALL LETTER Z WITH ACUTE
+ 0x017b: 0x00bd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+ 0x017c: 0x00be, # LATIN SMALL LETTER Z WITH DOT ABOVE
+ 0x017d: 0x00a6, # LATIN CAPITAL LETTER Z WITH CARON
+ 0x017e: 0x00a7, # LATIN SMALL LETTER Z WITH CARON
+ 0x02c7: 0x00f3, # CARON
+ 0x02d8: 0x00f4, # BREVE
+ 0x02d9: 0x00fa, # DOT ABOVE
+ 0x02db: 0x00f2, # OGONEK
+ 0x02dd: 0x00f1, # DOUBLE ACUTE ACCENT
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp855.py b/dist/lib/encodings/cp855.py
new file mode 100644
index 0000000..4fe9210
--- /dev/null
+++ b/dist/lib/encodings/cp855.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP855.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp855',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE
+ 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE
+ 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE
+ 0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE
+ 0x0084: 0x0451, # CYRILLIC SMALL LETTER IO
+ 0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO
+ 0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
+ 0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ 0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE
+ 0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE
+ 0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x008c: 0x0457, # CYRILLIC SMALL LETTER YI
+ 0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI
+ 0x008e: 0x0458, # CYRILLIC SMALL LETTER JE
+ 0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE
+ 0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE
+ 0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE
+ 0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE
+ 0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE
+ 0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE
+ 0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE
+ 0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE
+ 0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE
+ 0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U
+ 0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
+ 0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE
+ 0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE
+ 0x009c: 0x044e, # CYRILLIC SMALL LETTER YU
+ 0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU
+ 0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
+ 0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
+ 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A
+ 0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A
+ 0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE
+ 0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE
+ 0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE
+ 0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE
+ 0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE
+ 0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE
+ 0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE
+ 0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE
+ 0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF
+ 0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF
+ 0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE
+ 0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA
+ 0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA
+ 0x00b7: 0x0438, # CYRILLIC SMALL LETTER I
+ 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I
+ 0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA
+ 0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x00a4, # CURRENCY SIGN
+ 0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL
+ 0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL
+ 0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM
+ 0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM
+ 0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN
+ 0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN
+ 0x00d6: 0x043e, # CYRILLIC SMALL LETTER O
+ 0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O
+ 0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE
+ 0x00de: 0x044f, # CYRILLIC SMALL LETTER YA
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA
+ 0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER
+ 0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER
+ 0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES
+ 0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES
+ 0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE
+ 0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE
+ 0x00e7: 0x0443, # CYRILLIC SMALL LETTER U
+ 0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U
+ 0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE
+ 0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
+ 0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE
+ 0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE
+ 0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
+ 0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
+ 0x00ef: 0x2116, # NUMERO SIGN
+ 0x00f0: 0x00ad, # SOFT HYPHEN
+ 0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU
+ 0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU
+ 0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE
+ 0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE
+ 0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA
+ 0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA
+ 0x00f7: 0x044d, # CYRILLIC SMALL LETTER E
+ 0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E
+ 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
+ 0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
+ 0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE
+ 0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE
+ 0x00fd: 0x00a7, # SECTION SIGN
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\u0452' # 0x0080 -> CYRILLIC SMALL LETTER DJE
+ '\u0402' # 0x0081 -> CYRILLIC CAPITAL LETTER DJE
+ '\u0453' # 0x0082 -> CYRILLIC SMALL LETTER GJE
+ '\u0403' # 0x0083 -> CYRILLIC CAPITAL LETTER GJE
+ '\u0451' # 0x0084 -> CYRILLIC SMALL LETTER IO
+ '\u0401' # 0x0085 -> CYRILLIC CAPITAL LETTER IO
+ '\u0454' # 0x0086 -> CYRILLIC SMALL LETTER UKRAINIAN IE
+ '\u0404' # 0x0087 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ '\u0455' # 0x0088 -> CYRILLIC SMALL LETTER DZE
+ '\u0405' # 0x0089 -> CYRILLIC CAPITAL LETTER DZE
+ '\u0456' # 0x008a -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ '\u0406' # 0x008b -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ '\u0457' # 0x008c -> CYRILLIC SMALL LETTER YI
+ '\u0407' # 0x008d -> CYRILLIC CAPITAL LETTER YI
+ '\u0458' # 0x008e -> CYRILLIC SMALL LETTER JE
+ '\u0408' # 0x008f -> CYRILLIC CAPITAL LETTER JE
+ '\u0459' # 0x0090 -> CYRILLIC SMALL LETTER LJE
+ '\u0409' # 0x0091 -> CYRILLIC CAPITAL LETTER LJE
+ '\u045a' # 0x0092 -> CYRILLIC SMALL LETTER NJE
+ '\u040a' # 0x0093 -> CYRILLIC CAPITAL LETTER NJE
+ '\u045b' # 0x0094 -> CYRILLIC SMALL LETTER TSHE
+ '\u040b' # 0x0095 -> CYRILLIC CAPITAL LETTER TSHE
+ '\u045c' # 0x0096 -> CYRILLIC SMALL LETTER KJE
+ '\u040c' # 0x0097 -> CYRILLIC CAPITAL LETTER KJE
+ '\u045e' # 0x0098 -> CYRILLIC SMALL LETTER SHORT U
+ '\u040e' # 0x0099 -> CYRILLIC CAPITAL LETTER SHORT U
+ '\u045f' # 0x009a -> CYRILLIC SMALL LETTER DZHE
+ '\u040f' # 0x009b -> CYRILLIC CAPITAL LETTER DZHE
+ '\u044e' # 0x009c -> CYRILLIC SMALL LETTER YU
+ '\u042e' # 0x009d -> CYRILLIC CAPITAL LETTER YU
+ '\u044a' # 0x009e -> CYRILLIC SMALL LETTER HARD SIGN
+ '\u042a' # 0x009f -> CYRILLIC CAPITAL LETTER HARD SIGN
+ '\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A
+ '\u0410' # 0x00a1 -> CYRILLIC CAPITAL LETTER A
+ '\u0431' # 0x00a2 -> CYRILLIC SMALL LETTER BE
+ '\u0411' # 0x00a3 -> CYRILLIC CAPITAL LETTER BE
+ '\u0446' # 0x00a4 -> CYRILLIC SMALL LETTER TSE
+ '\u0426' # 0x00a5 -> CYRILLIC CAPITAL LETTER TSE
+ '\u0434' # 0x00a6 -> CYRILLIC SMALL LETTER DE
+ '\u0414' # 0x00a7 -> CYRILLIC CAPITAL LETTER DE
+ '\u0435' # 0x00a8 -> CYRILLIC SMALL LETTER IE
+ '\u0415' # 0x00a9 -> CYRILLIC CAPITAL LETTER IE
+ '\u0444' # 0x00aa -> CYRILLIC SMALL LETTER EF
+ '\u0424' # 0x00ab -> CYRILLIC CAPITAL LETTER EF
+ '\u0433' # 0x00ac -> CYRILLIC SMALL LETTER GHE
+ '\u0413' # 0x00ad -> CYRILLIC CAPITAL LETTER GHE
+ '\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\u0445' # 0x00b5 -> CYRILLIC SMALL LETTER HA
+ '\u0425' # 0x00b6 -> CYRILLIC CAPITAL LETTER HA
+ '\u0438' # 0x00b7 -> CYRILLIC SMALL LETTER I
+ '\u0418' # 0x00b8 -> CYRILLIC CAPITAL LETTER I
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\u0439' # 0x00bd -> CYRILLIC SMALL LETTER SHORT I
+ '\u0419' # 0x00be -> CYRILLIC CAPITAL LETTER SHORT I
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\u043a' # 0x00c6 -> CYRILLIC SMALL LETTER KA
+ '\u041a' # 0x00c7 -> CYRILLIC CAPITAL LETTER KA
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\xa4' # 0x00cf -> CURRENCY SIGN
+ '\u043b' # 0x00d0 -> CYRILLIC SMALL LETTER EL
+ '\u041b' # 0x00d1 -> CYRILLIC CAPITAL LETTER EL
+ '\u043c' # 0x00d2 -> CYRILLIC SMALL LETTER EM
+ '\u041c' # 0x00d3 -> CYRILLIC CAPITAL LETTER EM
+ '\u043d' # 0x00d4 -> CYRILLIC SMALL LETTER EN
+ '\u041d' # 0x00d5 -> CYRILLIC CAPITAL LETTER EN
+ '\u043e' # 0x00d6 -> CYRILLIC SMALL LETTER O
+ '\u041e' # 0x00d7 -> CYRILLIC CAPITAL LETTER O
+ '\u043f' # 0x00d8 -> CYRILLIC SMALL LETTER PE
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\u041f' # 0x00dd -> CYRILLIC CAPITAL LETTER PE
+ '\u044f' # 0x00de -> CYRILLIC SMALL LETTER YA
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\u042f' # 0x00e0 -> CYRILLIC CAPITAL LETTER YA
+ '\u0440' # 0x00e1 -> CYRILLIC SMALL LETTER ER
+ '\u0420' # 0x00e2 -> CYRILLIC CAPITAL LETTER ER
+ '\u0441' # 0x00e3 -> CYRILLIC SMALL LETTER ES
+ '\u0421' # 0x00e4 -> CYRILLIC CAPITAL LETTER ES
+ '\u0442' # 0x00e5 -> CYRILLIC SMALL LETTER TE
+ '\u0422' # 0x00e6 -> CYRILLIC CAPITAL LETTER TE
+ '\u0443' # 0x00e7 -> CYRILLIC SMALL LETTER U
+ '\u0423' # 0x00e8 -> CYRILLIC CAPITAL LETTER U
+ '\u0436' # 0x00e9 -> CYRILLIC SMALL LETTER ZHE
+ '\u0416' # 0x00ea -> CYRILLIC CAPITAL LETTER ZHE
+ '\u0432' # 0x00eb -> CYRILLIC SMALL LETTER VE
+ '\u0412' # 0x00ec -> CYRILLIC CAPITAL LETTER VE
+ '\u044c' # 0x00ed -> CYRILLIC SMALL LETTER SOFT SIGN
+ '\u042c' # 0x00ee -> CYRILLIC CAPITAL LETTER SOFT SIGN
+ '\u2116' # 0x00ef -> NUMERO SIGN
+ '\xad' # 0x00f0 -> SOFT HYPHEN
+ '\u044b' # 0x00f1 -> CYRILLIC SMALL LETTER YERU
+ '\u042b' # 0x00f2 -> CYRILLIC CAPITAL LETTER YERU
+ '\u0437' # 0x00f3 -> CYRILLIC SMALL LETTER ZE
+ '\u0417' # 0x00f4 -> CYRILLIC CAPITAL LETTER ZE
+ '\u0448' # 0x00f5 -> CYRILLIC SMALL LETTER SHA
+ '\u0428' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHA
+ '\u044d' # 0x00f7 -> CYRILLIC SMALL LETTER E
+ '\u042d' # 0x00f8 -> CYRILLIC CAPITAL LETTER E
+ '\u0449' # 0x00f9 -> CYRILLIC SMALL LETTER SHCHA
+ '\u0429' # 0x00fa -> CYRILLIC CAPITAL LETTER SHCHA
+ '\u0447' # 0x00fb -> CYRILLIC SMALL LETTER CHE
+ '\u0427' # 0x00fc -> CYRILLIC CAPITAL LETTER CHE
+ '\xa7' # 0x00fd -> SECTION SIGN
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a4: 0x00cf, # CURRENCY SIGN
+ 0x00a7: 0x00fd, # SECTION SIGN
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ad: 0x00f0, # SOFT HYPHEN
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x0401: 0x0085, # CYRILLIC CAPITAL LETTER IO
+ 0x0402: 0x0081, # CYRILLIC CAPITAL LETTER DJE
+ 0x0403: 0x0083, # CYRILLIC CAPITAL LETTER GJE
+ 0x0404: 0x0087, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ 0x0405: 0x0089, # CYRILLIC CAPITAL LETTER DZE
+ 0x0406: 0x008b, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x0407: 0x008d, # CYRILLIC CAPITAL LETTER YI
+ 0x0408: 0x008f, # CYRILLIC CAPITAL LETTER JE
+ 0x0409: 0x0091, # CYRILLIC CAPITAL LETTER LJE
+ 0x040a: 0x0093, # CYRILLIC CAPITAL LETTER NJE
+ 0x040b: 0x0095, # CYRILLIC CAPITAL LETTER TSHE
+ 0x040c: 0x0097, # CYRILLIC CAPITAL LETTER KJE
+ 0x040e: 0x0099, # CYRILLIC CAPITAL LETTER SHORT U
+ 0x040f: 0x009b, # CYRILLIC CAPITAL LETTER DZHE
+ 0x0410: 0x00a1, # CYRILLIC CAPITAL LETTER A
+ 0x0411: 0x00a3, # CYRILLIC CAPITAL LETTER BE
+ 0x0412: 0x00ec, # CYRILLIC CAPITAL LETTER VE
+ 0x0413: 0x00ad, # CYRILLIC CAPITAL LETTER GHE
+ 0x0414: 0x00a7, # CYRILLIC CAPITAL LETTER DE
+ 0x0415: 0x00a9, # CYRILLIC CAPITAL LETTER IE
+ 0x0416: 0x00ea, # CYRILLIC CAPITAL LETTER ZHE
+ 0x0417: 0x00f4, # CYRILLIC CAPITAL LETTER ZE
+ 0x0418: 0x00b8, # CYRILLIC CAPITAL LETTER I
+ 0x0419: 0x00be, # CYRILLIC CAPITAL LETTER SHORT I
+ 0x041a: 0x00c7, # CYRILLIC CAPITAL LETTER KA
+ 0x041b: 0x00d1, # CYRILLIC CAPITAL LETTER EL
+ 0x041c: 0x00d3, # CYRILLIC CAPITAL LETTER EM
+ 0x041d: 0x00d5, # CYRILLIC CAPITAL LETTER EN
+ 0x041e: 0x00d7, # CYRILLIC CAPITAL LETTER O
+ 0x041f: 0x00dd, # CYRILLIC CAPITAL LETTER PE
+ 0x0420: 0x00e2, # CYRILLIC CAPITAL LETTER ER
+ 0x0421: 0x00e4, # CYRILLIC CAPITAL LETTER ES
+ 0x0422: 0x00e6, # CYRILLIC CAPITAL LETTER TE
+ 0x0423: 0x00e8, # CYRILLIC CAPITAL LETTER U
+ 0x0424: 0x00ab, # CYRILLIC CAPITAL LETTER EF
+ 0x0425: 0x00b6, # CYRILLIC CAPITAL LETTER HA
+ 0x0426: 0x00a5, # CYRILLIC CAPITAL LETTER TSE
+ 0x0427: 0x00fc, # CYRILLIC CAPITAL LETTER CHE
+ 0x0428: 0x00f6, # CYRILLIC CAPITAL LETTER SHA
+ 0x0429: 0x00fa, # CYRILLIC CAPITAL LETTER SHCHA
+ 0x042a: 0x009f, # CYRILLIC CAPITAL LETTER HARD SIGN
+ 0x042b: 0x00f2, # CYRILLIC CAPITAL LETTER YERU
+ 0x042c: 0x00ee, # CYRILLIC CAPITAL LETTER SOFT SIGN
+ 0x042d: 0x00f8, # CYRILLIC CAPITAL LETTER E
+ 0x042e: 0x009d, # CYRILLIC CAPITAL LETTER YU
+ 0x042f: 0x00e0, # CYRILLIC CAPITAL LETTER YA
+ 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A
+ 0x0431: 0x00a2, # CYRILLIC SMALL LETTER BE
+ 0x0432: 0x00eb, # CYRILLIC SMALL LETTER VE
+ 0x0433: 0x00ac, # CYRILLIC SMALL LETTER GHE
+ 0x0434: 0x00a6, # CYRILLIC SMALL LETTER DE
+ 0x0435: 0x00a8, # CYRILLIC SMALL LETTER IE
+ 0x0436: 0x00e9, # CYRILLIC SMALL LETTER ZHE
+ 0x0437: 0x00f3, # CYRILLIC SMALL LETTER ZE
+ 0x0438: 0x00b7, # CYRILLIC SMALL LETTER I
+ 0x0439: 0x00bd, # CYRILLIC SMALL LETTER SHORT I
+ 0x043a: 0x00c6, # CYRILLIC SMALL LETTER KA
+ 0x043b: 0x00d0, # CYRILLIC SMALL LETTER EL
+ 0x043c: 0x00d2, # CYRILLIC SMALL LETTER EM
+ 0x043d: 0x00d4, # CYRILLIC SMALL LETTER EN
+ 0x043e: 0x00d6, # CYRILLIC SMALL LETTER O
+ 0x043f: 0x00d8, # CYRILLIC SMALL LETTER PE
+ 0x0440: 0x00e1, # CYRILLIC SMALL LETTER ER
+ 0x0441: 0x00e3, # CYRILLIC SMALL LETTER ES
+ 0x0442: 0x00e5, # CYRILLIC SMALL LETTER TE
+ 0x0443: 0x00e7, # CYRILLIC SMALL LETTER U
+ 0x0444: 0x00aa, # CYRILLIC SMALL LETTER EF
+ 0x0445: 0x00b5, # CYRILLIC SMALL LETTER HA
+ 0x0446: 0x00a4, # CYRILLIC SMALL LETTER TSE
+ 0x0447: 0x00fb, # CYRILLIC SMALL LETTER CHE
+ 0x0448: 0x00f5, # CYRILLIC SMALL LETTER SHA
+ 0x0449: 0x00f9, # CYRILLIC SMALL LETTER SHCHA
+ 0x044a: 0x009e, # CYRILLIC SMALL LETTER HARD SIGN
+ 0x044b: 0x00f1, # CYRILLIC SMALL LETTER YERU
+ 0x044c: 0x00ed, # CYRILLIC SMALL LETTER SOFT SIGN
+ 0x044d: 0x00f7, # CYRILLIC SMALL LETTER E
+ 0x044e: 0x009c, # CYRILLIC SMALL LETTER YU
+ 0x044f: 0x00de, # CYRILLIC SMALL LETTER YA
+ 0x0451: 0x0084, # CYRILLIC SMALL LETTER IO
+ 0x0452: 0x0080, # CYRILLIC SMALL LETTER DJE
+ 0x0453: 0x0082, # CYRILLIC SMALL LETTER GJE
+ 0x0454: 0x0086, # CYRILLIC SMALL LETTER UKRAINIAN IE
+ 0x0455: 0x0088, # CYRILLIC SMALL LETTER DZE
+ 0x0456: 0x008a, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ 0x0457: 0x008c, # CYRILLIC SMALL LETTER YI
+ 0x0458: 0x008e, # CYRILLIC SMALL LETTER JE
+ 0x0459: 0x0090, # CYRILLIC SMALL LETTER LJE
+ 0x045a: 0x0092, # CYRILLIC SMALL LETTER NJE
+ 0x045b: 0x0094, # CYRILLIC SMALL LETTER TSHE
+ 0x045c: 0x0096, # CYRILLIC SMALL LETTER KJE
+ 0x045e: 0x0098, # CYRILLIC SMALL LETTER SHORT U
+ 0x045f: 0x009a, # CYRILLIC SMALL LETTER DZHE
+ 0x2116: 0x00ef, # NUMERO SIGN
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp856.py b/dist/lib/encodings/cp856.py
new file mode 100644
index 0000000..cacbfb2
--- /dev/null
+++ b/dist/lib/encodings/cp856.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp856 generated from 'MAPPINGS/VENDORS/MISC/CP856.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp856',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\u05d0' # 0x80 -> HEBREW LETTER ALEF
+ '\u05d1' # 0x81 -> HEBREW LETTER BET
+ '\u05d2' # 0x82 -> HEBREW LETTER GIMEL
+ '\u05d3' # 0x83 -> HEBREW LETTER DALET
+ '\u05d4' # 0x84 -> HEBREW LETTER HE
+ '\u05d5' # 0x85 -> HEBREW LETTER VAV
+ '\u05d6' # 0x86 -> HEBREW LETTER ZAYIN
+ '\u05d7' # 0x87 -> HEBREW LETTER HET
+ '\u05d8' # 0x88 -> HEBREW LETTER TET
+ '\u05d9' # 0x89 -> HEBREW LETTER YOD
+ '\u05da' # 0x8A -> HEBREW LETTER FINAL KAF
+ '\u05db' # 0x8B -> HEBREW LETTER KAF
+ '\u05dc' # 0x8C -> HEBREW LETTER LAMED
+ '\u05dd' # 0x8D -> HEBREW LETTER FINAL MEM
+ '\u05de' # 0x8E -> HEBREW LETTER MEM
+ '\u05df' # 0x8F -> HEBREW LETTER FINAL NUN
+ '\u05e0' # 0x90 -> HEBREW LETTER NUN
+ '\u05e1' # 0x91 -> HEBREW LETTER SAMEKH
+ '\u05e2' # 0x92 -> HEBREW LETTER AYIN
+ '\u05e3' # 0x93 -> HEBREW LETTER FINAL PE
+ '\u05e4' # 0x94 -> HEBREW LETTER PE
+ '\u05e5' # 0x95 -> HEBREW LETTER FINAL TSADI
+ '\u05e6' # 0x96 -> HEBREW LETTER TSADI
+ '\u05e7' # 0x97 -> HEBREW LETTER QOF
+ '\u05e8' # 0x98 -> HEBREW LETTER RESH
+ '\u05e9' # 0x99 -> HEBREW LETTER SHIN
+ '\u05ea' # 0x9A -> HEBREW LETTER TAV
+ '\ufffe' # 0x9B -> UNDEFINED
+ '\xa3' # 0x9C -> POUND SIGN
+ '\ufffe' # 0x9D -> UNDEFINED
+ '\xd7' # 0x9E -> MULTIPLICATION SIGN
+ '\ufffe' # 0x9F -> UNDEFINED
+ '\ufffe' # 0xA0 -> UNDEFINED
+ '\ufffe' # 0xA1 -> UNDEFINED
+ '\ufffe' # 0xA2 -> UNDEFINED
+ '\ufffe' # 0xA3 -> UNDEFINED
+ '\ufffe' # 0xA4 -> UNDEFINED
+ '\ufffe' # 0xA5 -> UNDEFINED
+ '\ufffe' # 0xA6 -> UNDEFINED
+ '\ufffe' # 0xA7 -> UNDEFINED
+ '\ufffe' # 0xA8 -> UNDEFINED
+ '\xae' # 0xA9 -> REGISTERED SIGN
+ '\xac' # 0xAA -> NOT SIGN
+ '\xbd' # 0xAB -> VULGAR FRACTION ONE HALF
+ '\xbc' # 0xAC -> VULGAR FRACTION ONE QUARTER
+ '\ufffe' # 0xAD -> UNDEFINED
+ '\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u2591' # 0xB0 -> LIGHT SHADE
+ '\u2592' # 0xB1 -> MEDIUM SHADE
+ '\u2593' # 0xB2 -> DARK SHADE
+ '\u2502' # 0xB3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\ufffe' # 0xB5 -> UNDEFINED
+ '\ufffe' # 0xB6 -> UNDEFINED
+ '\ufffe' # 0xB7 -> UNDEFINED
+ '\xa9' # 0xB8 -> COPYRIGHT SIGN
+ '\u2563' # 0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0xBA -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\xa2' # 0xBD -> CENT SIGN
+ '\xa5' # 0xBE -> YEN SIGN
+ '\u2510' # 0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\ufffe' # 0xC6 -> UNDEFINED
+ '\ufffe' # 0xC7 -> UNDEFINED
+ '\u255a' # 0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\xa4' # 0xCF -> CURRENCY SIGN
+ '\ufffe' # 0xD0 -> UNDEFINED
+ '\ufffe' # 0xD1 -> UNDEFINED
+ '\ufffe' # 0xD2 -> UNDEFINED
+ '\ufffe' # 0xD3 -> UNDEFINEDS
+ '\ufffe' # 0xD4 -> UNDEFINED
+ '\ufffe' # 0xD5 -> UNDEFINED
+ '\ufffe' # 0xD6 -> UNDEFINEDE
+ '\ufffe' # 0xD7 -> UNDEFINED
+ '\ufffe' # 0xD8 -> UNDEFINED
+ '\u2518' # 0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0xDB -> FULL BLOCK
+ '\u2584' # 0xDC -> LOWER HALF BLOCK
+ '\xa6' # 0xDD -> BROKEN BAR
+ '\ufffe' # 0xDE -> UNDEFINED
+ '\u2580' # 0xDF -> UPPER HALF BLOCK
+ '\ufffe' # 0xE0 -> UNDEFINED
+ '\ufffe' # 0xE1 -> UNDEFINED
+ '\ufffe' # 0xE2 -> UNDEFINED
+ '\ufffe' # 0xE3 -> UNDEFINED
+ '\ufffe' # 0xE4 -> UNDEFINED
+ '\ufffe' # 0xE5 -> UNDEFINED
+ '\xb5' # 0xE6 -> MICRO SIGN
+ '\ufffe' # 0xE7 -> UNDEFINED
+ '\ufffe' # 0xE8 -> UNDEFINED
+ '\ufffe' # 0xE9 -> UNDEFINED
+ '\ufffe' # 0xEA -> UNDEFINED
+ '\ufffe' # 0xEB -> UNDEFINED
+ '\ufffe' # 0xEC -> UNDEFINED
+ '\ufffe' # 0xED -> UNDEFINED
+ '\xaf' # 0xEE -> MACRON
+ '\xb4' # 0xEF -> ACUTE ACCENT
+ '\xad' # 0xF0 -> SOFT HYPHEN
+ '\xb1' # 0xF1 -> PLUS-MINUS SIGN
+ '\u2017' # 0xF2 -> DOUBLE LOW LINE
+ '\xbe' # 0xF3 -> VULGAR FRACTION THREE QUARTERS
+ '\xb6' # 0xF4 -> PILCROW SIGN
+ '\xa7' # 0xF5 -> SECTION SIGN
+ '\xf7' # 0xF6 -> DIVISION SIGN
+ '\xb8' # 0xF7 -> CEDILLA
+ '\xb0' # 0xF8 -> DEGREE SIGN
+ '\xa8' # 0xF9 -> DIAERESIS
+ '\xb7' # 0xFA -> MIDDLE DOT
+ '\xb9' # 0xFB -> SUPERSCRIPT ONE
+ '\xb3' # 0xFC -> SUPERSCRIPT THREE
+ '\xb2' # 0xFD -> SUPERSCRIPT TWO
+ '\u25a0' # 0xFE -> BLACK SQUARE
+ '\xa0' # 0xFF -> NO-BREAK SPACE
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp857.py b/dist/lib/encodings/cp857.py
new file mode 100644
index 0000000..741b059
--- /dev/null
+++ b/dist/lib/encodings/cp857.py
@@ -0,0 +1,694 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP857.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp857',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
+ 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
+ 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
+ 0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA
+ 0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
+ 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE
+ 0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x00ae, # REGISTERED SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00b8: 0x00a9, # COPYRIGHT SIGN
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x00a2, # CENT SIGN
+ 0x00be: 0x00a5, # YEN SIGN
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
+ 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x00a4, # CURRENCY SIGN
+ 0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR
+ 0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR
+ 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00d5: None, # UNDEFINED
+ 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x00a6, # BROKEN BAR
+ 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
+ 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: None, # UNDEFINED
+ 0x00e8: 0x00d7, # MULTIPLICATION SIGN
+ 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x00ee: 0x00af, # MACRON
+ 0x00ef: 0x00b4, # ACUTE ACCENT
+ 0x00f0: 0x00ad, # SOFT HYPHEN
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: None, # UNDEFINED
+ 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
+ 0x00f4: 0x00b6, # PILCROW SIGN
+ 0x00f5: 0x00a7, # SECTION SIGN
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x00b8, # CEDILLA
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x00a8, # DIAERESIS
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x00b9, # SUPERSCRIPT ONE
+ 0x00fc: 0x00b3, # SUPERSCRIPT THREE
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ '\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\u0131' # 0x008d -> LATIN SMALL LETTER DOTLESS I
+ '\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
+ '\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
+ '\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+ '\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+ '\u0130' # 0x0098 -> LATIN CAPITAL LETTER I WITH DOT ABOVE
+ '\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
+ '\xa3' # 0x009c -> POUND SIGN
+ '\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+ '\u015e' # 0x009e -> LATIN CAPITAL LETTER S WITH CEDILLA
+ '\u015f' # 0x009f -> LATIN SMALL LETTER S WITH CEDILLA
+ '\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ '\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+ '\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\u011e' # 0x00a6 -> LATIN CAPITAL LETTER G WITH BREVE
+ '\u011f' # 0x00a7 -> LATIN SMALL LETTER G WITH BREVE
+ '\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ '\xae' # 0x00a9 -> REGISTERED SIGN
+ '\xac' # 0x00aa -> NOT SIGN
+ '\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ '\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ '\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ '\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xa9' # 0x00b8 -> COPYRIGHT SIGN
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\xa2' # 0x00bd -> CENT SIGN
+ '\xa5' # 0x00be -> YEN SIGN
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE
+ '\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\xa4' # 0x00cf -> CURRENCY SIGN
+ '\xba' # 0x00d0 -> MASCULINE ORDINAL INDICATOR
+ '\xaa' # 0x00d1 -> FEMININE ORDINAL INDICATOR
+ '\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\ufffe' # 0x00d5 -> UNDEFINED
+ '\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\xa6' # 0x00dd -> BROKEN BAR
+ '\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ '\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE
+ '\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE
+ '\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
+ '\xb5' # 0x00e6 -> MICRO SIGN
+ '\ufffe' # 0x00e7 -> UNDEFINED
+ '\xd7' # 0x00e8 -> MULTIPLICATION SIGN
+ '\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ '\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE
+ '\xff' # 0x00ed -> LATIN SMALL LETTER Y WITH DIAERESIS
+ '\xaf' # 0x00ee -> MACRON
+ '\xb4' # 0x00ef -> ACUTE ACCENT
+ '\xad' # 0x00f0 -> SOFT HYPHEN
+ '\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ '\ufffe' # 0x00f2 -> UNDEFINED
+ '\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS
+ '\xb6' # 0x00f4 -> PILCROW SIGN
+ '\xa7' # 0x00f5 -> SECTION SIGN
+ '\xf7' # 0x00f6 -> DIVISION SIGN
+ '\xb8' # 0x00f7 -> CEDILLA
+ '\xb0' # 0x00f8 -> DEGREE SIGN
+ '\xa8' # 0x00f9 -> DIAERESIS
+ '\xb7' # 0x00fa -> MIDDLE DOT
+ '\xb9' # 0x00fb -> SUPERSCRIPT ONE
+ '\xb3' # 0x00fc -> SUPERSCRIPT THREE
+ '\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a2: 0x00bd, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a4: 0x00cf, # CURRENCY SIGN
+ 0x00a5: 0x00be, # YEN SIGN
+ 0x00a6: 0x00dd, # BROKEN BAR
+ 0x00a7: 0x00f5, # SECTION SIGN
+ 0x00a8: 0x00f9, # DIAERESIS
+ 0x00a9: 0x00b8, # COPYRIGHT SIGN
+ 0x00aa: 0x00d1, # FEMININE ORDINAL INDICATOR
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00ad: 0x00f0, # SOFT HYPHEN
+ 0x00ae: 0x00a9, # REGISTERED SIGN
+ 0x00af: 0x00ee, # MACRON
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b3: 0x00fc, # SUPERSCRIPT THREE
+ 0x00b4: 0x00ef, # ACUTE ACCENT
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b6: 0x00f4, # PILCROW SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00b8: 0x00f7, # CEDILLA
+ 0x00b9: 0x00fb, # SUPERSCRIPT ONE
+ 0x00ba: 0x00d0, # MASCULINE ORDINAL INDICATOR
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00d7: 0x00e8, # MULTIPLICATION SIGN
+ 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
+ 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
+ 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x00ff: 0x00ed, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x011e: 0x00a6, # LATIN CAPITAL LETTER G WITH BREVE
+ 0x011f: 0x00a7, # LATIN SMALL LETTER G WITH BREVE
+ 0x0130: 0x0098, # LATIN CAPITAL LETTER I WITH DOT ABOVE
+ 0x0131: 0x008d, # LATIN SMALL LETTER DOTLESS I
+ 0x015e: 0x009e, # LATIN CAPITAL LETTER S WITH CEDILLA
+ 0x015f: 0x009f, # LATIN SMALL LETTER S WITH CEDILLA
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp858.py b/dist/lib/encodings/cp858.py
new file mode 100644
index 0000000..7579f52
--- /dev/null
+++ b/dist/lib/encodings/cp858.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec for CP858, modified from cp850.
+
+"""
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp858',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
+ 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
+ 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
+ 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x009e: 0x00d7, # MULTIPLICATION SIGN
+ 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
+ 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
+ 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x00ae, # REGISTERED SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00b8: 0x00a9, # COPYRIGHT SIGN
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x00a2, # CENT SIGN
+ 0x00be: 0x00a5, # YEN SIGN
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
+ 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x00a4, # CURRENCY SIGN
+ 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH
+ 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH
+ 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00d5: 0x20ac, # EURO SIGN
+ 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x00a6, # BROKEN BAR
+ 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
+ 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN
+ 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN
+ 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x00ee: 0x00af, # MACRON
+ 0x00ef: 0x00b4, # ACUTE ACCENT
+ 0x00f0: 0x00ad, # SOFT HYPHEN
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2017, # DOUBLE LOW LINE
+ 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS
+ 0x00f4: 0x00b6, # PILCROW SIGN
+ 0x00f5: 0x00a7, # SECTION SIGN
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x00b8, # CEDILLA
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x00a8, # DIAERESIS
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x00b9, # SUPERSCRIPT ONE
+ 0x00fc: 0x00b3, # SUPERSCRIPT THREE
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ '\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE
+ '\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
+ '\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
+ '\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+ '\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+ '\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
+ '\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
+ '\xa3' # 0x009c -> POUND SIGN
+ '\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+ '\xd7' # 0x009e -> MULTIPLICATION SIGN
+ '\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
+ '\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ '\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+ '\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
+ '\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
+ '\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ '\xae' # 0x00a9 -> REGISTERED SIGN
+ '\xac' # 0x00aa -> NOT SIGN
+ '\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ '\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ '\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ '\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xa9' # 0x00b8 -> COPYRIGHT SIGN
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\xa2' # 0x00bd -> CENT SIGN
+ '\xa5' # 0x00be -> YEN SIGN
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE
+ '\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\xa4' # 0x00cf -> CURRENCY SIGN
+ '\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH
+ '\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH
+ '\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\u20ac' # 0x00d5 -> EURO SIGN
+ '\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\xa6' # 0x00dd -> BROKEN BAR
+ '\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ '\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE
+ '\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE
+ '\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
+ '\xb5' # 0x00e6 -> MICRO SIGN
+ '\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN
+ '\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN
+ '\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ '\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE
+ '\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE
+ '\xaf' # 0x00ee -> MACRON
+ '\xb4' # 0x00ef -> ACUTE ACCENT
+ '\xad' # 0x00f0 -> SOFT HYPHEN
+ '\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ '\u2017' # 0x00f2 -> DOUBLE LOW LINE
+ '\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS
+ '\xb6' # 0x00f4 -> PILCROW SIGN
+ '\xa7' # 0x00f5 -> SECTION SIGN
+ '\xf7' # 0x00f6 -> DIVISION SIGN
+ '\xb8' # 0x00f7 -> CEDILLA
+ '\xb0' # 0x00f8 -> DEGREE SIGN
+ '\xa8' # 0x00f9 -> DIAERESIS
+ '\xb7' # 0x00fa -> MIDDLE DOT
+ '\xb9' # 0x00fb -> SUPERSCRIPT ONE
+ '\xb3' # 0x00fc -> SUPERSCRIPT THREE
+ '\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a2: 0x00bd, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a4: 0x00cf, # CURRENCY SIGN
+ 0x00a5: 0x00be, # YEN SIGN
+ 0x00a6: 0x00dd, # BROKEN BAR
+ 0x00a7: 0x00f5, # SECTION SIGN
+ 0x00a8: 0x00f9, # DIAERESIS
+ 0x00a9: 0x00b8, # COPYRIGHT SIGN
+ 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00ad: 0x00f0, # SOFT HYPHEN
+ 0x00ae: 0x00a9, # REGISTERED SIGN
+ 0x00af: 0x00ee, # MACRON
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b3: 0x00fc, # SUPERSCRIPT THREE
+ 0x00b4: 0x00ef, # ACUTE ACCENT
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b6: 0x00f4, # PILCROW SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00b8: 0x00f7, # CEDILLA
+ 0x00b9: 0x00fb, # SUPERSCRIPT ONE
+ 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH
+ 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00d7: 0x009e, # MULTIPLICATION SIGN
+ 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x00f0: 0x00d0, # LATIN SMALL LETTER ETH
+ 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
+ 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
+ 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x00fe: 0x00e7, # LATIN SMALL LETTER THORN
+ 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x20ac: 0x00d5, # EURO SIGN
+ 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
+ 0x2017: 0x00f2, # DOUBLE LOW LINE
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp860.py b/dist/lib/encodings/cp860.py
new file mode 100644
index 0000000..65903e7
--- /dev/null
+++ b/dist/lib/encodings/cp860.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP860.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp860',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
+ 0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE
+ 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
+ 0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
+ 0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00a2, # CENT SIGN
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x009e: 0x20a7, # PESETA SIGN
+ 0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
+ 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
+ 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x00e3: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x00ec: 0x221e, # INFINITY
+ 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x00ef: 0x2229, # INTERSECTION
+ 0x00f0: 0x2261, # IDENTICAL TO
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
+ 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
+ 0x00f4: 0x2320, # TOP HALF INTEGRAL
+ 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x2248, # ALMOST EQUAL TO
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe3' # 0x0084 -> LATIN SMALL LETTER A WITH TILDE
+ '\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xc1' # 0x0086 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xca' # 0x0089 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ '\xcd' # 0x008b -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xd4' # 0x008c -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE
+ '\xc3' # 0x008e -> LATIN CAPITAL LETTER A WITH TILDE
+ '\xc2' # 0x008f -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xc0' # 0x0091 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xc8' # 0x0092 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf5' # 0x0094 -> LATIN SMALL LETTER O WITH TILDE
+ '\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+ '\xda' # 0x0096 -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+ '\xcc' # 0x0098 -> LATIN CAPITAL LETTER I WITH GRAVE
+ '\xd5' # 0x0099 -> LATIN CAPITAL LETTER O WITH TILDE
+ '\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xa2' # 0x009b -> CENT SIGN
+ '\xa3' # 0x009c -> POUND SIGN
+ '\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\u20a7' # 0x009e -> PESETA SIGN
+ '\xd3' # 0x009f -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ '\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+ '\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
+ '\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
+ '\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ '\xd2' # 0x00a9 -> LATIN CAPITAL LETTER O WITH GRAVE
+ '\xac' # 0x00aa -> NOT SIGN
+ '\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ '\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ '\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ '\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ '\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ '\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ '\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ '\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ '\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ '\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ '\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ '\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ '\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ '\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ '\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ '\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ '\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ '\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\u258c' # 0x00dd -> LEFT HALF BLOCK
+ '\u2590' # 0x00de -> RIGHT HALF BLOCK
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
+ '\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ '\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
+ '\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
+ '\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
+ '\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
+ '\xb5' # 0x00e6 -> MICRO SIGN
+ '\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
+ '\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
+ '\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
+ '\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
+ '\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
+ '\u221e' # 0x00ec -> INFINITY
+ '\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
+ '\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
+ '\u2229' # 0x00ef -> INTERSECTION
+ '\u2261' # 0x00f0 -> IDENTICAL TO
+ '\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ '\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
+ '\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
+ '\u2320' # 0x00f4 -> TOP HALF INTEGRAL
+ '\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
+ '\xf7' # 0x00f6 -> DIVISION SIGN
+ '\u2248' # 0x00f7 -> ALMOST EQUAL TO
+ '\xb0' # 0x00f8 -> DEGREE SIGN
+ '\u2219' # 0x00f9 -> BULLET OPERATOR
+ '\xb7' # 0x00fa -> MIDDLE DOT
+ '\u221a' # 0x00fb -> SQUARE ROOT
+ '\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+ '\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a2: 0x009b, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00c0: 0x0091, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00c1: 0x0086, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00c2: 0x008f, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00c3: 0x008e, # LATIN CAPITAL LETTER A WITH TILDE
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c8: 0x0092, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00ca: 0x0089, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00cc: 0x0098, # LATIN CAPITAL LETTER I WITH GRAVE
+ 0x00cd: 0x008b, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00d2: 0x00a9, # LATIN CAPITAL LETTER O WITH GRAVE
+ 0x00d3: 0x009f, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00d4: 0x008c, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00d5: 0x0099, # LATIN CAPITAL LETTER O WITH TILDE
+ 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00da: 0x0096, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e3: 0x0084, # LATIN SMALL LETTER A WITH TILDE
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
+ 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f5: 0x0094, # LATIN SMALL LETTER O WITH TILDE
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
+ 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
+ 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
+ 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
+ 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
+ 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
+ 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
+ 0x03c0: 0x00e3, # GREEK SMALL LETTER PI
+ 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
+ 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
+ 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x20a7: 0x009e, # PESETA SIGN
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x221e: 0x00ec, # INFINITY
+ 0x2229: 0x00ef, # INTERSECTION
+ 0x2248: 0x00f7, # ALMOST EQUAL TO
+ 0x2261: 0x00f0, # IDENTICAL TO
+ 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
+ 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
+ 0x2320: 0x00f4, # TOP HALF INTEGRAL
+ 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp861.py b/dist/lib/encodings/cp861.py
new file mode 100644
index 0000000..860a05f
--- /dev/null
+++ b/dist/lib/encodings/cp861.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP861.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp861',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH
+ 0x008c: 0x00f0, # LATIN SMALL LETTER ETH
+ 0x008d: 0x00de, # LATIN CAPITAL LETTER THORN
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
+ 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x00fe, # LATIN SMALL LETTER THORN
+ 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x009e: 0x20a7, # PESETA SIGN
+ 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x2310, # REVERSED NOT SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x00e3: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x00ec: 0x221e, # INFINITY
+ 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x00ef: 0x2229, # INTERSECTION
+ 0x00f0: 0x2261, # IDENTICAL TO
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
+ 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
+ 0x00f4: 0x2320, # TOP HALF INTEGRAL
+ 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x2248, # ALMOST EQUAL TO
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ '\xd0' # 0x008b -> LATIN CAPITAL LETTER ETH
+ '\xf0' # 0x008c -> LATIN SMALL LETTER ETH
+ '\xde' # 0x008d -> LATIN CAPITAL LETTER THORN
+ '\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
+ '\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
+ '\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xfe' # 0x0095 -> LATIN SMALL LETTER THORN
+ '\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xdd' # 0x0097 -> LATIN CAPITAL LETTER Y WITH ACUTE
+ '\xfd' # 0x0098 -> LATIN SMALL LETTER Y WITH ACUTE
+ '\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
+ '\xa3' # 0x009c -> POUND SIGN
+ '\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+ '\u20a7' # 0x009e -> PESETA SIGN
+ '\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
+ '\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ '\xc1' # 0x00a4 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xcd' # 0x00a5 -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xd3' # 0x00a6 -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xda' # 0x00a7 -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ '\u2310' # 0x00a9 -> REVERSED NOT SIGN
+ '\xac' # 0x00aa -> NOT SIGN
+ '\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ '\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ '\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ '\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ '\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ '\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ '\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ '\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ '\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ '\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ '\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ '\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ '\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ '\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ '\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ '\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ '\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ '\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\u258c' # 0x00dd -> LEFT HALF BLOCK
+ '\u2590' # 0x00de -> RIGHT HALF BLOCK
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
+ '\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ '\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
+ '\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
+ '\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
+ '\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
+ '\xb5' # 0x00e6 -> MICRO SIGN
+ '\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
+ '\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
+ '\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
+ '\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
+ '\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
+ '\u221e' # 0x00ec -> INFINITY
+ '\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
+ '\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
+ '\u2229' # 0x00ef -> INTERSECTION
+ '\u2261' # 0x00f0 -> IDENTICAL TO
+ '\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ '\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
+ '\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
+ '\u2320' # 0x00f4 -> TOP HALF INTEGRAL
+ '\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
+ '\xf7' # 0x00f6 -> DIVISION SIGN
+ '\u2248' # 0x00f7 -> ALMOST EQUAL TO
+ '\xb0' # 0x00f8 -> DEGREE SIGN
+ '\u2219' # 0x00f9 -> BULLET OPERATOR
+ '\xb7' # 0x00fa -> MIDDLE DOT
+ '\u221a' # 0x00fb -> SQUARE ROOT
+ '\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+ '\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00c1: 0x00a4, # LATIN CAPITAL LETTER A WITH ACUTE
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00cd: 0x00a5, # LATIN CAPITAL LETTER I WITH ACUTE
+ 0x00d0: 0x008b, # LATIN CAPITAL LETTER ETH
+ 0x00d3: 0x00a6, # LATIN CAPITAL LETTER O WITH ACUTE
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x00da: 0x00a7, # LATIN CAPITAL LETTER U WITH ACUTE
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00dd: 0x0097, # LATIN CAPITAL LETTER Y WITH ACUTE
+ 0x00de: 0x008d, # LATIN CAPITAL LETTER THORN
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00f0: 0x008c, # LATIN SMALL LETTER ETH
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x00fd: 0x0098, # LATIN SMALL LETTER Y WITH ACUTE
+ 0x00fe: 0x0095, # LATIN SMALL LETTER THORN
+ 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
+ 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
+ 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
+ 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
+ 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
+ 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
+ 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
+ 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
+ 0x03c0: 0x00e3, # GREEK SMALL LETTER PI
+ 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
+ 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
+ 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x20a7: 0x009e, # PESETA SIGN
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x221e: 0x00ec, # INFINITY
+ 0x2229: 0x00ef, # INTERSECTION
+ 0x2248: 0x00f7, # ALMOST EQUAL TO
+ 0x2261: 0x00f0, # IDENTICAL TO
+ 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
+ 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
+ 0x2310: 0x00a9, # REVERSED NOT SIGN
+ 0x2320: 0x00f4, # TOP HALF INTEGRAL
+ 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp862.py b/dist/lib/encodings/cp862.py
new file mode 100644
index 0000000..3df22f9
--- /dev/null
+++ b/dist/lib/encodings/cp862.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP862.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp862',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x05d0, # HEBREW LETTER ALEF
+ 0x0081: 0x05d1, # HEBREW LETTER BET
+ 0x0082: 0x05d2, # HEBREW LETTER GIMEL
+ 0x0083: 0x05d3, # HEBREW LETTER DALET
+ 0x0084: 0x05d4, # HEBREW LETTER HE
+ 0x0085: 0x05d5, # HEBREW LETTER VAV
+ 0x0086: 0x05d6, # HEBREW LETTER ZAYIN
+ 0x0087: 0x05d7, # HEBREW LETTER HET
+ 0x0088: 0x05d8, # HEBREW LETTER TET
+ 0x0089: 0x05d9, # HEBREW LETTER YOD
+ 0x008a: 0x05da, # HEBREW LETTER FINAL KAF
+ 0x008b: 0x05db, # HEBREW LETTER KAF
+ 0x008c: 0x05dc, # HEBREW LETTER LAMED
+ 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM
+ 0x008e: 0x05de, # HEBREW LETTER MEM
+ 0x008f: 0x05df, # HEBREW LETTER FINAL NUN
+ 0x0090: 0x05e0, # HEBREW LETTER NUN
+ 0x0091: 0x05e1, # HEBREW LETTER SAMEKH
+ 0x0092: 0x05e2, # HEBREW LETTER AYIN
+ 0x0093: 0x05e3, # HEBREW LETTER FINAL PE
+ 0x0094: 0x05e4, # HEBREW LETTER PE
+ 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI
+ 0x0096: 0x05e6, # HEBREW LETTER TSADI
+ 0x0097: 0x05e7, # HEBREW LETTER QOF
+ 0x0098: 0x05e8, # HEBREW LETTER RESH
+ 0x0099: 0x05e9, # HEBREW LETTER SHIN
+ 0x009a: 0x05ea, # HEBREW LETTER TAV
+ 0x009b: 0x00a2, # CENT SIGN
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00a5, # YEN SIGN
+ 0x009e: 0x20a7, # PESETA SIGN
+ 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
+ 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
+ 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x2310, # REVERSED NOT SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN)
+ 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x00e3: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x00ec: 0x221e, # INFINITY
+ 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x00ef: 0x2229, # INTERSECTION
+ 0x00f0: 0x2261, # IDENTICAL TO
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
+ 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
+ 0x00f4: 0x2320, # TOP HALF INTEGRAL
+ 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x2248, # ALMOST EQUAL TO
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\u05d0' # 0x0080 -> HEBREW LETTER ALEF
+ '\u05d1' # 0x0081 -> HEBREW LETTER BET
+ '\u05d2' # 0x0082 -> HEBREW LETTER GIMEL
+ '\u05d3' # 0x0083 -> HEBREW LETTER DALET
+ '\u05d4' # 0x0084 -> HEBREW LETTER HE
+ '\u05d5' # 0x0085 -> HEBREW LETTER VAV
+ '\u05d6' # 0x0086 -> HEBREW LETTER ZAYIN
+ '\u05d7' # 0x0087 -> HEBREW LETTER HET
+ '\u05d8' # 0x0088 -> HEBREW LETTER TET
+ '\u05d9' # 0x0089 -> HEBREW LETTER YOD
+ '\u05da' # 0x008a -> HEBREW LETTER FINAL KAF
+ '\u05db' # 0x008b -> HEBREW LETTER KAF
+ '\u05dc' # 0x008c -> HEBREW LETTER LAMED
+ '\u05dd' # 0x008d -> HEBREW LETTER FINAL MEM
+ '\u05de' # 0x008e -> HEBREW LETTER MEM
+ '\u05df' # 0x008f -> HEBREW LETTER FINAL NUN
+ '\u05e0' # 0x0090 -> HEBREW LETTER NUN
+ '\u05e1' # 0x0091 -> HEBREW LETTER SAMEKH
+ '\u05e2' # 0x0092 -> HEBREW LETTER AYIN
+ '\u05e3' # 0x0093 -> HEBREW LETTER FINAL PE
+ '\u05e4' # 0x0094 -> HEBREW LETTER PE
+ '\u05e5' # 0x0095 -> HEBREW LETTER FINAL TSADI
+ '\u05e6' # 0x0096 -> HEBREW LETTER TSADI
+ '\u05e7' # 0x0097 -> HEBREW LETTER QOF
+ '\u05e8' # 0x0098 -> HEBREW LETTER RESH
+ '\u05e9' # 0x0099 -> HEBREW LETTER SHIN
+ '\u05ea' # 0x009a -> HEBREW LETTER TAV
+ '\xa2' # 0x009b -> CENT SIGN
+ '\xa3' # 0x009c -> POUND SIGN
+ '\xa5' # 0x009d -> YEN SIGN
+ '\u20a7' # 0x009e -> PESETA SIGN
+ '\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
+ '\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ '\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+ '\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
+ '\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
+ '\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ '\u2310' # 0x00a9 -> REVERSED NOT SIGN
+ '\xac' # 0x00aa -> NOT SIGN
+ '\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ '\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ '\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ '\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ '\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ '\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ '\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ '\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ '\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ '\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ '\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ '\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ '\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ '\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ '\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ '\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ '\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ '\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\u258c' # 0x00dd -> LEFT HALF BLOCK
+ '\u2590' # 0x00de -> RIGHT HALF BLOCK
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
+ '\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN)
+ '\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
+ '\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
+ '\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
+ '\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
+ '\xb5' # 0x00e6 -> MICRO SIGN
+ '\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
+ '\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
+ '\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
+ '\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
+ '\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
+ '\u221e' # 0x00ec -> INFINITY
+ '\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
+ '\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
+ '\u2229' # 0x00ef -> INTERSECTION
+ '\u2261' # 0x00f0 -> IDENTICAL TO
+ '\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ '\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
+ '\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
+ '\u2320' # 0x00f4 -> TOP HALF INTEGRAL
+ '\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
+ '\xf7' # 0x00f6 -> DIVISION SIGN
+ '\u2248' # 0x00f7 -> ALMOST EQUAL TO
+ '\xb0' # 0x00f8 -> DEGREE SIGN
+ '\u2219' # 0x00f9 -> BULLET OPERATOR
+ '\xb7' # 0x00fa -> MIDDLE DOT
+ '\u221a' # 0x00fb -> SQUARE ROOT
+ '\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+ '\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a2: 0x009b, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a5: 0x009d, # YEN SIGN
+ 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN)
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
+ 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
+ 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
+ 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
+ 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
+ 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
+ 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
+ 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
+ 0x03c0: 0x00e3, # GREEK SMALL LETTER PI
+ 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
+ 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
+ 0x05d0: 0x0080, # HEBREW LETTER ALEF
+ 0x05d1: 0x0081, # HEBREW LETTER BET
+ 0x05d2: 0x0082, # HEBREW LETTER GIMEL
+ 0x05d3: 0x0083, # HEBREW LETTER DALET
+ 0x05d4: 0x0084, # HEBREW LETTER HE
+ 0x05d5: 0x0085, # HEBREW LETTER VAV
+ 0x05d6: 0x0086, # HEBREW LETTER ZAYIN
+ 0x05d7: 0x0087, # HEBREW LETTER HET
+ 0x05d8: 0x0088, # HEBREW LETTER TET
+ 0x05d9: 0x0089, # HEBREW LETTER YOD
+ 0x05da: 0x008a, # HEBREW LETTER FINAL KAF
+ 0x05db: 0x008b, # HEBREW LETTER KAF
+ 0x05dc: 0x008c, # HEBREW LETTER LAMED
+ 0x05dd: 0x008d, # HEBREW LETTER FINAL MEM
+ 0x05de: 0x008e, # HEBREW LETTER MEM
+ 0x05df: 0x008f, # HEBREW LETTER FINAL NUN
+ 0x05e0: 0x0090, # HEBREW LETTER NUN
+ 0x05e1: 0x0091, # HEBREW LETTER SAMEKH
+ 0x05e2: 0x0092, # HEBREW LETTER AYIN
+ 0x05e3: 0x0093, # HEBREW LETTER FINAL PE
+ 0x05e4: 0x0094, # HEBREW LETTER PE
+ 0x05e5: 0x0095, # HEBREW LETTER FINAL TSADI
+ 0x05e6: 0x0096, # HEBREW LETTER TSADI
+ 0x05e7: 0x0097, # HEBREW LETTER QOF
+ 0x05e8: 0x0098, # HEBREW LETTER RESH
+ 0x05e9: 0x0099, # HEBREW LETTER SHIN
+ 0x05ea: 0x009a, # HEBREW LETTER TAV
+ 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x20a7: 0x009e, # PESETA SIGN
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x221e: 0x00ec, # INFINITY
+ 0x2229: 0x00ef, # INTERSECTION
+ 0x2248: 0x00f7, # ALMOST EQUAL TO
+ 0x2261: 0x00f0, # IDENTICAL TO
+ 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
+ 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
+ 0x2310: 0x00a9, # REVERSED NOT SIGN
+ 0x2320: 0x00f4, # TOP HALF INTEGRAL
+ 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp863.py b/dist/lib/encodings/cp863.py
new file mode 100644
index 0000000..764180b
--- /dev/null
+++ b/dist/lib/encodings/cp863.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP863.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp863',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00b6, # PILCROW SIGN
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x008d: 0x2017, # DOUBLE LOW LINE
+ 0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x008f: 0x00a7, # SECTION SIGN
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
+ 0x0098: 0x00a4, # CURRENCY SIGN
+ 0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00a2, # CENT SIGN
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
+ 0x00a0: 0x00a6, # BROKEN BAR
+ 0x00a1: 0x00b4, # ACUTE ACCENT
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00a8, # DIAERESIS
+ 0x00a5: 0x00b8, # CEDILLA
+ 0x00a6: 0x00b3, # SUPERSCRIPT THREE
+ 0x00a7: 0x00af, # MACRON
+ 0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00a9: 0x2310, # REVERSED NOT SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x00e3: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x00ec: 0x221e, # INFINITY
+ 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x00ef: 0x2229, # INTERSECTION
+ 0x00f0: 0x2261, # IDENTICAL TO
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
+ 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
+ 0x00f4: 0x2320, # TOP HALF INTEGRAL
+ 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x2248, # ALMOST EQUAL TO
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xc2' # 0x0084 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xb6' # 0x0086 -> PILCROW SIGN
+ '\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ '\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\u2017' # 0x008d -> DOUBLE LOW LINE
+ '\xc0' # 0x008e -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xa7' # 0x008f -> SECTION SIGN
+ '\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xc8' # 0x0091 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\xca' # 0x0092 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xcb' # 0x0094 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\xcf' # 0x0095 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+ '\xa4' # 0x0098 -> CURRENCY SIGN
+ '\xd4' # 0x0099 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xa2' # 0x009b -> CENT SIGN
+ '\xa3' # 0x009c -> POUND SIGN
+ '\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xdb' # 0x009e -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ '\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
+ '\xa6' # 0x00a0 -> BROKEN BAR
+ '\xb4' # 0x00a1 -> ACUTE ACCENT
+ '\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ '\xa8' # 0x00a4 -> DIAERESIS
+ '\xb8' # 0x00a5 -> CEDILLA
+ '\xb3' # 0x00a6 -> SUPERSCRIPT THREE
+ '\xaf' # 0x00a7 -> MACRON
+ '\xce' # 0x00a8 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\u2310' # 0x00a9 -> REVERSED NOT SIGN
+ '\xac' # 0x00aa -> NOT SIGN
+ '\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ '\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ '\xbe' # 0x00ad -> VULGAR FRACTION THREE QUARTERS
+ '\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ '\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ '\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ '\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ '\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ '\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ '\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ '\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ '\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ '\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ '\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ '\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ '\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ '\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ '\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\u258c' # 0x00dd -> LEFT HALF BLOCK
+ '\u2590' # 0x00de -> RIGHT HALF BLOCK
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
+ '\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ '\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
+ '\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
+ '\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
+ '\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
+ '\xb5' # 0x00e6 -> MICRO SIGN
+ '\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
+ '\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
+ '\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
+ '\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
+ '\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
+ '\u221e' # 0x00ec -> INFINITY
+ '\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
+ '\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
+ '\u2229' # 0x00ef -> INTERSECTION
+ '\u2261' # 0x00f0 -> IDENTICAL TO
+ '\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ '\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
+ '\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
+ '\u2320' # 0x00f4 -> TOP HALF INTEGRAL
+ '\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
+ '\xf7' # 0x00f6 -> DIVISION SIGN
+ '\u2248' # 0x00f7 -> ALMOST EQUAL TO
+ '\xb0' # 0x00f8 -> DEGREE SIGN
+ '\u2219' # 0x00f9 -> BULLET OPERATOR
+ '\xb7' # 0x00fa -> MIDDLE DOT
+ '\u221a' # 0x00fb -> SQUARE ROOT
+ '\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+ '\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a2: 0x009b, # CENT SIGN
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a4: 0x0098, # CURRENCY SIGN
+ 0x00a6: 0x00a0, # BROKEN BAR
+ 0x00a7: 0x008f, # SECTION SIGN
+ 0x00a8: 0x00a4, # DIAERESIS
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00af: 0x00a7, # MACRON
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b3: 0x00a6, # SUPERSCRIPT THREE
+ 0x00b4: 0x00a1, # ACUTE ACCENT
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b6: 0x0086, # PILCROW SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00b8: 0x00a5, # CEDILLA
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00be: 0x00ad, # VULGAR FRACTION THREE QUARTERS
+ 0x00c0: 0x008e, # LATIN CAPITAL LETTER A WITH GRAVE
+ 0x00c2: 0x0084, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c8: 0x0091, # LATIN CAPITAL LETTER E WITH GRAVE
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00ca: 0x0092, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ 0x00cb: 0x0094, # LATIN CAPITAL LETTER E WITH DIAERESIS
+ 0x00ce: 0x00a8, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ 0x00cf: 0x0095, # LATIN CAPITAL LETTER I WITH DIAERESIS
+ 0x00d4: 0x0099, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE
+ 0x00db: 0x009e, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
+ 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
+ 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
+ 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
+ 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
+ 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
+ 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
+ 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
+ 0x03c0: 0x00e3, # GREEK SMALL LETTER PI
+ 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
+ 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
+ 0x2017: 0x008d, # DOUBLE LOW LINE
+ 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x221e: 0x00ec, # INFINITY
+ 0x2229: 0x00ef, # INTERSECTION
+ 0x2248: 0x00f7, # ALMOST EQUAL TO
+ 0x2261: 0x00f0, # IDENTICAL TO
+ 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
+ 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
+ 0x2310: 0x00a9, # REVERSED NOT SIGN
+ 0x2320: 0x00f4, # TOP HALF INTEGRAL
+ 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp864.py b/dist/lib/encodings/cp864.py
new file mode 100644
index 0000000..53df482
--- /dev/null
+++ b/dist/lib/encodings/cp864.py
@@ -0,0 +1,690 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP864.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp864',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0025: 0x066a, # ARABIC PERCENT SIGN
+ 0x0080: 0x00b0, # DEGREE SIGN
+ 0x0081: 0x00b7, # MIDDLE DOT
+ 0x0082: 0x2219, # BULLET OPERATOR
+ 0x0083: 0x221a, # SQUARE ROOT
+ 0x0084: 0x2592, # MEDIUM SHADE
+ 0x0085: 0x2500, # FORMS LIGHT HORIZONTAL
+ 0x0086: 0x2502, # FORMS LIGHT VERTICAL
+ 0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL
+ 0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT
+ 0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL
+ 0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT
+ 0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL
+ 0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT
+ 0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT
+ 0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT
+ 0x008f: 0x2518, # FORMS LIGHT UP AND LEFT
+ 0x0090: 0x03b2, # GREEK SMALL BETA
+ 0x0091: 0x221e, # INFINITY
+ 0x0092: 0x03c6, # GREEK SMALL PHI
+ 0x0093: 0x00b1, # PLUS-OR-MINUS SIGN
+ 0x0094: 0x00bd, # FRACTION 1/2
+ 0x0095: 0x00bc, # FRACTION 1/4
+ 0x0096: 0x2248, # ALMOST EQUAL TO
+ 0x0097: 0x00ab, # LEFT POINTING GUILLEMET
+ 0x0098: 0x00bb, # RIGHT POINTING GUILLEMET
+ 0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM
+ 0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM
+ 0x009b: None, # UNDEFINED
+ 0x009c: None, # UNDEFINED
+ 0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM
+ 0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM
+ 0x009f: None, # UNDEFINED
+ 0x00a1: 0x00ad, # SOFT HYPHEN
+ 0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM
+ 0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM
+ 0x00a6: None, # UNDEFINED
+ 0x00a7: None, # UNDEFINED
+ 0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM
+ 0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM
+ 0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM
+ 0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM
+ 0x00ac: 0x060c, # ARABIC COMMA
+ 0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM
+ 0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM
+ 0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM
+ 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO
+ 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE
+ 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO
+ 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE
+ 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR
+ 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE
+ 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX
+ 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN
+ 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT
+ 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE
+ 0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM
+ 0x00bb: 0x061b, # ARABIC SEMICOLON
+ 0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM
+ 0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM
+ 0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM
+ 0x00bf: 0x061f, # ARABIC QUESTION MARK
+ 0x00c0: 0x00a2, # CENT SIGN
+ 0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM
+ 0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
+ 0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM
+ 0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
+ 0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM
+ 0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
+ 0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM
+ 0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM
+ 0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM
+ 0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM
+ 0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM
+ 0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM
+ 0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM
+ 0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM
+ 0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM
+ 0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM
+ 0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM
+ 0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM
+ 0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM
+ 0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM
+ 0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM
+ 0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM
+ 0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM
+ 0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM
+ 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM
+ 0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM
+ 0x00db: 0x00a6, # BROKEN VERTICAL BAR
+ 0x00dc: 0x00ac, # NOT SIGN
+ 0x00dd: 0x00f7, # DIVISION SIGN
+ 0x00de: 0x00d7, # MULTIPLICATION SIGN
+ 0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM
+ 0x00e0: 0x0640, # ARABIC TATWEEL
+ 0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM
+ 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM
+ 0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM
+ 0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM
+ 0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM
+ 0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM
+ 0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM
+ 0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM
+ 0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM
+ 0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM
+ 0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM
+ 0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM
+ 0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM
+ 0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM
+ 0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM
+ 0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM
+ 0x00f1: 0x0651, # ARABIC SHADDAH
+ 0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM
+ 0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM
+ 0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM
+ 0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM
+ 0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM
+ 0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM
+ 0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM
+ 0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM
+ 0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM
+ 0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM
+ 0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM
+ 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: None, # UNDEFINED
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '\u066a' # 0x0025 -> ARABIC PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\xb0' # 0x0080 -> DEGREE SIGN
+ '\xb7' # 0x0081 -> MIDDLE DOT
+ '\u2219' # 0x0082 -> BULLET OPERATOR
+ '\u221a' # 0x0083 -> SQUARE ROOT
+ '\u2592' # 0x0084 -> MEDIUM SHADE
+ '\u2500' # 0x0085 -> FORMS LIGHT HORIZONTAL
+ '\u2502' # 0x0086 -> FORMS LIGHT VERTICAL
+ '\u253c' # 0x0087 -> FORMS LIGHT VERTICAL AND HORIZONTAL
+ '\u2524' # 0x0088 -> FORMS LIGHT VERTICAL AND LEFT
+ '\u252c' # 0x0089 -> FORMS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x008a -> FORMS LIGHT VERTICAL AND RIGHT
+ '\u2534' # 0x008b -> FORMS LIGHT UP AND HORIZONTAL
+ '\u2510' # 0x008c -> FORMS LIGHT DOWN AND LEFT
+ '\u250c' # 0x008d -> FORMS LIGHT DOWN AND RIGHT
+ '\u2514' # 0x008e -> FORMS LIGHT UP AND RIGHT
+ '\u2518' # 0x008f -> FORMS LIGHT UP AND LEFT
+ '\u03b2' # 0x0090 -> GREEK SMALL BETA
+ '\u221e' # 0x0091 -> INFINITY
+ '\u03c6' # 0x0092 -> GREEK SMALL PHI
+ '\xb1' # 0x0093 -> PLUS-OR-MINUS SIGN
+ '\xbd' # 0x0094 -> FRACTION 1/2
+ '\xbc' # 0x0095 -> FRACTION 1/4
+ '\u2248' # 0x0096 -> ALMOST EQUAL TO
+ '\xab' # 0x0097 -> LEFT POINTING GUILLEMET
+ '\xbb' # 0x0098 -> RIGHT POINTING GUILLEMET
+ '\ufef7' # 0x0099 -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM
+ '\ufef8' # 0x009a -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM
+ '\ufffe' # 0x009b -> UNDEFINED
+ '\ufffe' # 0x009c -> UNDEFINED
+ '\ufefb' # 0x009d -> ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM
+ '\ufefc' # 0x009e -> ARABIC LIGATURE LAM WITH ALEF FINAL FORM
+ '\ufffe' # 0x009f -> UNDEFINED
+ '\xa0' # 0x00a0 -> NON-BREAKING SPACE
+ '\xad' # 0x00a1 -> SOFT HYPHEN
+ '\ufe82' # 0x00a2 -> ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM
+ '\xa3' # 0x00a3 -> POUND SIGN
+ '\xa4' # 0x00a4 -> CURRENCY SIGN
+ '\ufe84' # 0x00a5 -> ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM
+ '\ufffe' # 0x00a6 -> UNDEFINED
+ '\ufffe' # 0x00a7 -> UNDEFINED
+ '\ufe8e' # 0x00a8 -> ARABIC LETTER ALEF FINAL FORM
+ '\ufe8f' # 0x00a9 -> ARABIC LETTER BEH ISOLATED FORM
+ '\ufe95' # 0x00aa -> ARABIC LETTER TEH ISOLATED FORM
+ '\ufe99' # 0x00ab -> ARABIC LETTER THEH ISOLATED FORM
+ '\u060c' # 0x00ac -> ARABIC COMMA
+ '\ufe9d' # 0x00ad -> ARABIC LETTER JEEM ISOLATED FORM
+ '\ufea1' # 0x00ae -> ARABIC LETTER HAH ISOLATED FORM
+ '\ufea5' # 0x00af -> ARABIC LETTER KHAH ISOLATED FORM
+ '\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO
+ '\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE
+ '\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO
+ '\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE
+ '\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR
+ '\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE
+ '\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX
+ '\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN
+ '\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT
+ '\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE
+ '\ufed1' # 0x00ba -> ARABIC LETTER FEH ISOLATED FORM
+ '\u061b' # 0x00bb -> ARABIC SEMICOLON
+ '\ufeb1' # 0x00bc -> ARABIC LETTER SEEN ISOLATED FORM
+ '\ufeb5' # 0x00bd -> ARABIC LETTER SHEEN ISOLATED FORM
+ '\ufeb9' # 0x00be -> ARABIC LETTER SAD ISOLATED FORM
+ '\u061f' # 0x00bf -> ARABIC QUESTION MARK
+ '\xa2' # 0x00c0 -> CENT SIGN
+ '\ufe80' # 0x00c1 -> ARABIC LETTER HAMZA ISOLATED FORM
+ '\ufe81' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
+ '\ufe83' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM
+ '\ufe85' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
+ '\ufeca' # 0x00c5 -> ARABIC LETTER AIN FINAL FORM
+ '\ufe8b' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
+ '\ufe8d' # 0x00c7 -> ARABIC LETTER ALEF ISOLATED FORM
+ '\ufe91' # 0x00c8 -> ARABIC LETTER BEH INITIAL FORM
+ '\ufe93' # 0x00c9 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM
+ '\ufe97' # 0x00ca -> ARABIC LETTER TEH INITIAL FORM
+ '\ufe9b' # 0x00cb -> ARABIC LETTER THEH INITIAL FORM
+ '\ufe9f' # 0x00cc -> ARABIC LETTER JEEM INITIAL FORM
+ '\ufea3' # 0x00cd -> ARABIC LETTER HAH INITIAL FORM
+ '\ufea7' # 0x00ce -> ARABIC LETTER KHAH INITIAL FORM
+ '\ufea9' # 0x00cf -> ARABIC LETTER DAL ISOLATED FORM
+ '\ufeab' # 0x00d0 -> ARABIC LETTER THAL ISOLATED FORM
+ '\ufead' # 0x00d1 -> ARABIC LETTER REH ISOLATED FORM
+ '\ufeaf' # 0x00d2 -> ARABIC LETTER ZAIN ISOLATED FORM
+ '\ufeb3' # 0x00d3 -> ARABIC LETTER SEEN INITIAL FORM
+ '\ufeb7' # 0x00d4 -> ARABIC LETTER SHEEN INITIAL FORM
+ '\ufebb' # 0x00d5 -> ARABIC LETTER SAD INITIAL FORM
+ '\ufebf' # 0x00d6 -> ARABIC LETTER DAD INITIAL FORM
+ '\ufec1' # 0x00d7 -> ARABIC LETTER TAH ISOLATED FORM
+ '\ufec5' # 0x00d8 -> ARABIC LETTER ZAH ISOLATED FORM
+ '\ufecb' # 0x00d9 -> ARABIC LETTER AIN INITIAL FORM
+ '\ufecf' # 0x00da -> ARABIC LETTER GHAIN INITIAL FORM
+ '\xa6' # 0x00db -> BROKEN VERTICAL BAR
+ '\xac' # 0x00dc -> NOT SIGN
+ '\xf7' # 0x00dd -> DIVISION SIGN
+ '\xd7' # 0x00de -> MULTIPLICATION SIGN
+ '\ufec9' # 0x00df -> ARABIC LETTER AIN ISOLATED FORM
+ '\u0640' # 0x00e0 -> ARABIC TATWEEL
+ '\ufed3' # 0x00e1 -> ARABIC LETTER FEH INITIAL FORM
+ '\ufed7' # 0x00e2 -> ARABIC LETTER QAF INITIAL FORM
+ '\ufedb' # 0x00e3 -> ARABIC LETTER KAF INITIAL FORM
+ '\ufedf' # 0x00e4 -> ARABIC LETTER LAM INITIAL FORM
+ '\ufee3' # 0x00e5 -> ARABIC LETTER MEEM INITIAL FORM
+ '\ufee7' # 0x00e6 -> ARABIC LETTER NOON INITIAL FORM
+ '\ufeeb' # 0x00e7 -> ARABIC LETTER HEH INITIAL FORM
+ '\ufeed' # 0x00e8 -> ARABIC LETTER WAW ISOLATED FORM
+ '\ufeef' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA ISOLATED FORM
+ '\ufef3' # 0x00ea -> ARABIC LETTER YEH INITIAL FORM
+ '\ufebd' # 0x00eb -> ARABIC LETTER DAD ISOLATED FORM
+ '\ufecc' # 0x00ec -> ARABIC LETTER AIN MEDIAL FORM
+ '\ufece' # 0x00ed -> ARABIC LETTER GHAIN FINAL FORM
+ '\ufecd' # 0x00ee -> ARABIC LETTER GHAIN ISOLATED FORM
+ '\ufee1' # 0x00ef -> ARABIC LETTER MEEM ISOLATED FORM
+ '\ufe7d' # 0x00f0 -> ARABIC SHADDA MEDIAL FORM
+ '\u0651' # 0x00f1 -> ARABIC SHADDAH
+ '\ufee5' # 0x00f2 -> ARABIC LETTER NOON ISOLATED FORM
+ '\ufee9' # 0x00f3 -> ARABIC LETTER HEH ISOLATED FORM
+ '\ufeec' # 0x00f4 -> ARABIC LETTER HEH MEDIAL FORM
+ '\ufef0' # 0x00f5 -> ARABIC LETTER ALEF MAKSURA FINAL FORM
+ '\ufef2' # 0x00f6 -> ARABIC LETTER YEH FINAL FORM
+ '\ufed0' # 0x00f7 -> ARABIC LETTER GHAIN MEDIAL FORM
+ '\ufed5' # 0x00f8 -> ARABIC LETTER QAF ISOLATED FORM
+ '\ufef5' # 0x00f9 -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM
+ '\ufef6' # 0x00fa -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM
+ '\ufedd' # 0x00fb -> ARABIC LETTER LAM ISOLATED FORM
+ '\ufed9' # 0x00fc -> ARABIC LETTER KAF ISOLATED FORM
+ '\ufef1' # 0x00fd -> ARABIC LETTER YEH ISOLATED FORM
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\ufffe' # 0x00ff -> UNDEFINED
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00a0, # NON-BREAKING SPACE
+ 0x00a2: 0x00c0, # CENT SIGN
+ 0x00a3: 0x00a3, # POUND SIGN
+ 0x00a4: 0x00a4, # CURRENCY SIGN
+ 0x00a6: 0x00db, # BROKEN VERTICAL BAR
+ 0x00ab: 0x0097, # LEFT POINTING GUILLEMET
+ 0x00ac: 0x00dc, # NOT SIGN
+ 0x00ad: 0x00a1, # SOFT HYPHEN
+ 0x00b0: 0x0080, # DEGREE SIGN
+ 0x00b1: 0x0093, # PLUS-OR-MINUS SIGN
+ 0x00b7: 0x0081, # MIDDLE DOT
+ 0x00bb: 0x0098, # RIGHT POINTING GUILLEMET
+ 0x00bc: 0x0095, # FRACTION 1/4
+ 0x00bd: 0x0094, # FRACTION 1/2
+ 0x00d7: 0x00de, # MULTIPLICATION SIGN
+ 0x00f7: 0x00dd, # DIVISION SIGN
+ 0x03b2: 0x0090, # GREEK SMALL BETA
+ 0x03c6: 0x0092, # GREEK SMALL PHI
+ 0x060c: 0x00ac, # ARABIC COMMA
+ 0x061b: 0x00bb, # ARABIC SEMICOLON
+ 0x061f: 0x00bf, # ARABIC QUESTION MARK
+ 0x0640: 0x00e0, # ARABIC TATWEEL
+ 0x0651: 0x00f1, # ARABIC SHADDAH
+ 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO
+ 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE
+ 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO
+ 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE
+ 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR
+ 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE
+ 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX
+ 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN
+ 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT
+ 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE
+ 0x066a: 0x0025, # ARABIC PERCENT SIGN
+ 0x2219: 0x0082, # BULLET OPERATOR
+ 0x221a: 0x0083, # SQUARE ROOT
+ 0x221e: 0x0091, # INFINITY
+ 0x2248: 0x0096, # ALMOST EQUAL TO
+ 0x2500: 0x0085, # FORMS LIGHT HORIZONTAL
+ 0x2502: 0x0086, # FORMS LIGHT VERTICAL
+ 0x250c: 0x008d, # FORMS LIGHT DOWN AND RIGHT
+ 0x2510: 0x008c, # FORMS LIGHT DOWN AND LEFT
+ 0x2514: 0x008e, # FORMS LIGHT UP AND RIGHT
+ 0x2518: 0x008f, # FORMS LIGHT UP AND LEFT
+ 0x251c: 0x008a, # FORMS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x0088, # FORMS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x0089, # FORMS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x008b, # FORMS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x0087, # FORMS LIGHT VERTICAL AND HORIZONTAL
+ 0x2592: 0x0084, # MEDIUM SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+ 0xfe7d: 0x00f0, # ARABIC SHADDA MEDIAL FORM
+ 0xfe80: 0x00c1, # ARABIC LETTER HAMZA ISOLATED FORM
+ 0xfe81: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
+ 0xfe82: 0x00a2, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM
+ 0xfe83: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM
+ 0xfe84: 0x00a5, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM
+ 0xfe85: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
+ 0xfe8b: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
+ 0xfe8d: 0x00c7, # ARABIC LETTER ALEF ISOLATED FORM
+ 0xfe8e: 0x00a8, # ARABIC LETTER ALEF FINAL FORM
+ 0xfe8f: 0x00a9, # ARABIC LETTER BEH ISOLATED FORM
+ 0xfe91: 0x00c8, # ARABIC LETTER BEH INITIAL FORM
+ 0xfe93: 0x00c9, # ARABIC LETTER TEH MARBUTA ISOLATED FORM
+ 0xfe95: 0x00aa, # ARABIC LETTER TEH ISOLATED FORM
+ 0xfe97: 0x00ca, # ARABIC LETTER TEH INITIAL FORM
+ 0xfe99: 0x00ab, # ARABIC LETTER THEH ISOLATED FORM
+ 0xfe9b: 0x00cb, # ARABIC LETTER THEH INITIAL FORM
+ 0xfe9d: 0x00ad, # ARABIC LETTER JEEM ISOLATED FORM
+ 0xfe9f: 0x00cc, # ARABIC LETTER JEEM INITIAL FORM
+ 0xfea1: 0x00ae, # ARABIC LETTER HAH ISOLATED FORM
+ 0xfea3: 0x00cd, # ARABIC LETTER HAH INITIAL FORM
+ 0xfea5: 0x00af, # ARABIC LETTER KHAH ISOLATED FORM
+ 0xfea7: 0x00ce, # ARABIC LETTER KHAH INITIAL FORM
+ 0xfea9: 0x00cf, # ARABIC LETTER DAL ISOLATED FORM
+ 0xfeab: 0x00d0, # ARABIC LETTER THAL ISOLATED FORM
+ 0xfead: 0x00d1, # ARABIC LETTER REH ISOLATED FORM
+ 0xfeaf: 0x00d2, # ARABIC LETTER ZAIN ISOLATED FORM
+ 0xfeb1: 0x00bc, # ARABIC LETTER SEEN ISOLATED FORM
+ 0xfeb3: 0x00d3, # ARABIC LETTER SEEN INITIAL FORM
+ 0xfeb5: 0x00bd, # ARABIC LETTER SHEEN ISOLATED FORM
+ 0xfeb7: 0x00d4, # ARABIC LETTER SHEEN INITIAL FORM
+ 0xfeb9: 0x00be, # ARABIC LETTER SAD ISOLATED FORM
+ 0xfebb: 0x00d5, # ARABIC LETTER SAD INITIAL FORM
+ 0xfebd: 0x00eb, # ARABIC LETTER DAD ISOLATED FORM
+ 0xfebf: 0x00d6, # ARABIC LETTER DAD INITIAL FORM
+ 0xfec1: 0x00d7, # ARABIC LETTER TAH ISOLATED FORM
+ 0xfec5: 0x00d8, # ARABIC LETTER ZAH ISOLATED FORM
+ 0xfec9: 0x00df, # ARABIC LETTER AIN ISOLATED FORM
+ 0xfeca: 0x00c5, # ARABIC LETTER AIN FINAL FORM
+ 0xfecb: 0x00d9, # ARABIC LETTER AIN INITIAL FORM
+ 0xfecc: 0x00ec, # ARABIC LETTER AIN MEDIAL FORM
+ 0xfecd: 0x00ee, # ARABIC LETTER GHAIN ISOLATED FORM
+ 0xfece: 0x00ed, # ARABIC LETTER GHAIN FINAL FORM
+ 0xfecf: 0x00da, # ARABIC LETTER GHAIN INITIAL FORM
+ 0xfed0: 0x00f7, # ARABIC LETTER GHAIN MEDIAL FORM
+ 0xfed1: 0x00ba, # ARABIC LETTER FEH ISOLATED FORM
+ 0xfed3: 0x00e1, # ARABIC LETTER FEH INITIAL FORM
+ 0xfed5: 0x00f8, # ARABIC LETTER QAF ISOLATED FORM
+ 0xfed7: 0x00e2, # ARABIC LETTER QAF INITIAL FORM
+ 0xfed9: 0x00fc, # ARABIC LETTER KAF ISOLATED FORM
+ 0xfedb: 0x00e3, # ARABIC LETTER KAF INITIAL FORM
+ 0xfedd: 0x00fb, # ARABIC LETTER LAM ISOLATED FORM
+ 0xfedf: 0x00e4, # ARABIC LETTER LAM INITIAL FORM
+ 0xfee1: 0x00ef, # ARABIC LETTER MEEM ISOLATED FORM
+ 0xfee3: 0x00e5, # ARABIC LETTER MEEM INITIAL FORM
+ 0xfee5: 0x00f2, # ARABIC LETTER NOON ISOLATED FORM
+ 0xfee7: 0x00e6, # ARABIC LETTER NOON INITIAL FORM
+ 0xfee9: 0x00f3, # ARABIC LETTER HEH ISOLATED FORM
+ 0xfeeb: 0x00e7, # ARABIC LETTER HEH INITIAL FORM
+ 0xfeec: 0x00f4, # ARABIC LETTER HEH MEDIAL FORM
+ 0xfeed: 0x00e8, # ARABIC LETTER WAW ISOLATED FORM
+ 0xfeef: 0x00e9, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM
+ 0xfef0: 0x00f5, # ARABIC LETTER ALEF MAKSURA FINAL FORM
+ 0xfef1: 0x00fd, # ARABIC LETTER YEH ISOLATED FORM
+ 0xfef2: 0x00f6, # ARABIC LETTER YEH FINAL FORM
+ 0xfef3: 0x00ea, # ARABIC LETTER YEH INITIAL FORM
+ 0xfef5: 0x00f9, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM
+ 0xfef6: 0x00fa, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM
+ 0xfef7: 0x0099, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM
+ 0xfef8: 0x009a, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM
+ 0xfefb: 0x009d, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM
+ 0xfefc: 0x009e, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM
+}
diff --git a/dist/lib/encodings/cp865.py b/dist/lib/encodings/cp865.py
new file mode 100644
index 0000000..6726cf3
--- /dev/null
+++ b/dist/lib/encodings/cp865.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP865.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp865',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE
+ 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE
+ 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE
+ 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE
+ 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE
+ 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE
+ 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE
+ 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE
+ 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x009e: 0x20a7, # PESETA SIGN
+ 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK
+ 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE
+ 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR
+ 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR
+ 0x00a8: 0x00bf, # INVERTED QUESTION MARK
+ 0x00a9: 0x2310, # REVERSED NOT SIGN
+ 0x00aa: 0x00ac, # NOT SIGN
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER
+ 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00a4, # CURRENCY SIGN
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S
+ 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x00e3: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00e6: 0x00b5, # MICRO SIGN
+ 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x00ec: 0x221e, # INFINITY
+ 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x00ef: 0x2229, # INTERSECTION
+ 0x00f0: 0x2261, # IDENTICAL TO
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO
+ 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO
+ 0x00f4: 0x2320, # TOP HALF INTEGRAL
+ 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL
+ 0x00f6: 0x00f7, # DIVISION SIGN
+ 0x00f7: 0x2248, # ALMOST EQUAL TO
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x00fd: 0x00b2, # SUPERSCRIPT TWO
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE
+ '\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE
+ '\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE
+ '\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE
+ '\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+ '\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+ '\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
+ '\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE
+ '\xa3' # 0x009c -> POUND SIGN
+ '\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+ '\u20a7' # 0x009e -> PESETA SIGN
+ '\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK
+ '\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+ '\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+ '\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR
+ '\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR
+ '\xbf' # 0x00a8 -> INVERTED QUESTION MARK
+ '\u2310' # 0x00a9 -> REVERSED NOT SIGN
+ '\xac' # 0x00aa -> NOT SIGN
+ '\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ '\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER
+ '\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK
+ '\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xa4' # 0x00af -> CURRENCY SIGN
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ '\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ '\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ '\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ '\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ '\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ '\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ '\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ '\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ '\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ '\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ '\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ '\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ '\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ '\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\u258c' # 0x00dd -> LEFT HALF BLOCK
+ '\u2590' # 0x00de -> RIGHT HALF BLOCK
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA
+ '\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S
+ '\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA
+ '\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI
+ '\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA
+ '\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA
+ '\xb5' # 0x00e6 -> MICRO SIGN
+ '\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU
+ '\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI
+ '\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA
+ '\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA
+ '\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA
+ '\u221e' # 0x00ec -> INFINITY
+ '\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI
+ '\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON
+ '\u2229' # 0x00ef -> INTERSECTION
+ '\u2261' # 0x00f0 -> IDENTICAL TO
+ '\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ '\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO
+ '\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO
+ '\u2320' # 0x00f4 -> TOP HALF INTEGRAL
+ '\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL
+ '\xf7' # 0x00f6 -> DIVISION SIGN
+ '\u2248' # 0x00f7 -> ALMOST EQUAL TO
+ '\xb0' # 0x00f8 -> DEGREE SIGN
+ '\u2219' # 0x00f9 -> BULLET OPERATOR
+ '\xb7' # 0x00fa -> MIDDLE DOT
+ '\u221a' # 0x00fb -> SQUARE ROOT
+ '\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+ '\xb2' # 0x00fd -> SUPERSCRIPT TWO
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a4: 0x00af, # CURRENCY SIGN
+ 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x00aa, # NOT SIGN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x00fd, # SUPERSCRIPT TWO
+ 0x00b5: 0x00e6, # MICRO SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR
+ 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x00bf: 0x00a8, # INVERTED QUESTION MARK
+ 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS
+ 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE
+ 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE
+ 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA
+ 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE
+ 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE
+ 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS
+ 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE
+ 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS
+ 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S
+ 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE
+ 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE
+ 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX
+ 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS
+ 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE
+ 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE
+ 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA
+ 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE
+ 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE
+ 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX
+ 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS
+ 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE
+ 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE
+ 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX
+ 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS
+ 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE
+ 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE
+ 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE
+ 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX
+ 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS
+ 0x00f7: 0x00f6, # DIVISION SIGN
+ 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE
+ 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE
+ 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE
+ 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX
+ 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS
+ 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS
+ 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK
+ 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA
+ 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA
+ 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA
+ 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI
+ 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA
+ 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA
+ 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON
+ 0x03c0: 0x00e3, # GREEK SMALL LETTER PI
+ 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU
+ 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI
+ 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N
+ 0x20a7: 0x009e, # PESETA SIGN
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x221e: 0x00ec, # INFINITY
+ 0x2229: 0x00ef, # INTERSECTION
+ 0x2248: 0x00f7, # ALMOST EQUAL TO
+ 0x2261: 0x00f0, # IDENTICAL TO
+ 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO
+ 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO
+ 0x2310: 0x00a9, # REVERSED NOT SIGN
+ 0x2320: 0x00f4, # TOP HALF INTEGRAL
+ 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp866.py b/dist/lib/encodings/cp866.py
new file mode 100644
index 0000000..bec7ae3
--- /dev/null
+++ b/dist/lib/encodings/cp866.py
@@ -0,0 +1,698 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP866.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp866',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A
+ 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE
+ 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE
+ 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE
+ 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE
+ 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE
+ 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE
+ 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE
+ 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I
+ 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I
+ 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA
+ 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL
+ 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM
+ 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN
+ 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O
+ 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE
+ 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER
+ 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES
+ 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE
+ 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U
+ 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF
+ 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA
+ 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE
+ 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE
+ 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA
+ 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA
+ 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN
+ 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU
+ 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN
+ 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E
+ 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU
+ 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA
+ 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A
+ 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE
+ 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE
+ 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE
+ 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE
+ 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE
+ 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE
+ 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE
+ 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I
+ 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I
+ 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA
+ 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL
+ 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM
+ 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN
+ 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O
+ 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x258c, # LEFT HALF BLOCK
+ 0x00de: 0x2590, # RIGHT HALF BLOCK
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER
+ 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES
+ 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE
+ 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U
+ 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF
+ 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA
+ 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE
+ 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE
+ 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA
+ 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA
+ 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN
+ 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU
+ 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN
+ 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E
+ 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU
+ 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA
+ 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO
+ 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO
+ 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
+ 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI
+ 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI
+ 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U
+ 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x2219, # BULLET OPERATOR
+ 0x00fa: 0x00b7, # MIDDLE DOT
+ 0x00fb: 0x221a, # SQUARE ROOT
+ 0x00fc: 0x2116, # NUMERO SIGN
+ 0x00fd: 0x00a4, # CURRENCY SIGN
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\u0410' # 0x0080 -> CYRILLIC CAPITAL LETTER A
+ '\u0411' # 0x0081 -> CYRILLIC CAPITAL LETTER BE
+ '\u0412' # 0x0082 -> CYRILLIC CAPITAL LETTER VE
+ '\u0413' # 0x0083 -> CYRILLIC CAPITAL LETTER GHE
+ '\u0414' # 0x0084 -> CYRILLIC CAPITAL LETTER DE
+ '\u0415' # 0x0085 -> CYRILLIC CAPITAL LETTER IE
+ '\u0416' # 0x0086 -> CYRILLIC CAPITAL LETTER ZHE
+ '\u0417' # 0x0087 -> CYRILLIC CAPITAL LETTER ZE
+ '\u0418' # 0x0088 -> CYRILLIC CAPITAL LETTER I
+ '\u0419' # 0x0089 -> CYRILLIC CAPITAL LETTER SHORT I
+ '\u041a' # 0x008a -> CYRILLIC CAPITAL LETTER KA
+ '\u041b' # 0x008b -> CYRILLIC CAPITAL LETTER EL
+ '\u041c' # 0x008c -> CYRILLIC CAPITAL LETTER EM
+ '\u041d' # 0x008d -> CYRILLIC CAPITAL LETTER EN
+ '\u041e' # 0x008e -> CYRILLIC CAPITAL LETTER O
+ '\u041f' # 0x008f -> CYRILLIC CAPITAL LETTER PE
+ '\u0420' # 0x0090 -> CYRILLIC CAPITAL LETTER ER
+ '\u0421' # 0x0091 -> CYRILLIC CAPITAL LETTER ES
+ '\u0422' # 0x0092 -> CYRILLIC CAPITAL LETTER TE
+ '\u0423' # 0x0093 -> CYRILLIC CAPITAL LETTER U
+ '\u0424' # 0x0094 -> CYRILLIC CAPITAL LETTER EF
+ '\u0425' # 0x0095 -> CYRILLIC CAPITAL LETTER HA
+ '\u0426' # 0x0096 -> CYRILLIC CAPITAL LETTER TSE
+ '\u0427' # 0x0097 -> CYRILLIC CAPITAL LETTER CHE
+ '\u0428' # 0x0098 -> CYRILLIC CAPITAL LETTER SHA
+ '\u0429' # 0x0099 -> CYRILLIC CAPITAL LETTER SHCHA
+ '\u042a' # 0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN
+ '\u042b' # 0x009b -> CYRILLIC CAPITAL LETTER YERU
+ '\u042c' # 0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN
+ '\u042d' # 0x009d -> CYRILLIC CAPITAL LETTER E
+ '\u042e' # 0x009e -> CYRILLIC CAPITAL LETTER YU
+ '\u042f' # 0x009f -> CYRILLIC CAPITAL LETTER YA
+ '\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A
+ '\u0431' # 0x00a1 -> CYRILLIC SMALL LETTER BE
+ '\u0432' # 0x00a2 -> CYRILLIC SMALL LETTER VE
+ '\u0433' # 0x00a3 -> CYRILLIC SMALL LETTER GHE
+ '\u0434' # 0x00a4 -> CYRILLIC SMALL LETTER DE
+ '\u0435' # 0x00a5 -> CYRILLIC SMALL LETTER IE
+ '\u0436' # 0x00a6 -> CYRILLIC SMALL LETTER ZHE
+ '\u0437' # 0x00a7 -> CYRILLIC SMALL LETTER ZE
+ '\u0438' # 0x00a8 -> CYRILLIC SMALL LETTER I
+ '\u0439' # 0x00a9 -> CYRILLIC SMALL LETTER SHORT I
+ '\u043a' # 0x00aa -> CYRILLIC SMALL LETTER KA
+ '\u043b' # 0x00ab -> CYRILLIC SMALL LETTER EL
+ '\u043c' # 0x00ac -> CYRILLIC SMALL LETTER EM
+ '\u043d' # 0x00ad -> CYRILLIC SMALL LETTER EN
+ '\u043e' # 0x00ae -> CYRILLIC SMALL LETTER O
+ '\u043f' # 0x00af -> CYRILLIC SMALL LETTER PE
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ '\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ '\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ '\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ '\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ '\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ '\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ '\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ '\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ '\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ '\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ '\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ '\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ '\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ '\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\u258c' # 0x00dd -> LEFT HALF BLOCK
+ '\u2590' # 0x00de -> RIGHT HALF BLOCK
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\u0440' # 0x00e0 -> CYRILLIC SMALL LETTER ER
+ '\u0441' # 0x00e1 -> CYRILLIC SMALL LETTER ES
+ '\u0442' # 0x00e2 -> CYRILLIC SMALL LETTER TE
+ '\u0443' # 0x00e3 -> CYRILLIC SMALL LETTER U
+ '\u0444' # 0x00e4 -> CYRILLIC SMALL LETTER EF
+ '\u0445' # 0x00e5 -> CYRILLIC SMALL LETTER HA
+ '\u0446' # 0x00e6 -> CYRILLIC SMALL LETTER TSE
+ '\u0447' # 0x00e7 -> CYRILLIC SMALL LETTER CHE
+ '\u0448' # 0x00e8 -> CYRILLIC SMALL LETTER SHA
+ '\u0449' # 0x00e9 -> CYRILLIC SMALL LETTER SHCHA
+ '\u044a' # 0x00ea -> CYRILLIC SMALL LETTER HARD SIGN
+ '\u044b' # 0x00eb -> CYRILLIC SMALL LETTER YERU
+ '\u044c' # 0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN
+ '\u044d' # 0x00ed -> CYRILLIC SMALL LETTER E
+ '\u044e' # 0x00ee -> CYRILLIC SMALL LETTER YU
+ '\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA
+ '\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO
+ '\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO
+ '\u0404' # 0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ '\u0454' # 0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE
+ '\u0407' # 0x00f4 -> CYRILLIC CAPITAL LETTER YI
+ '\u0457' # 0x00f5 -> CYRILLIC SMALL LETTER YI
+ '\u040e' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U
+ '\u045e' # 0x00f7 -> CYRILLIC SMALL LETTER SHORT U
+ '\xb0' # 0x00f8 -> DEGREE SIGN
+ '\u2219' # 0x00f9 -> BULLET OPERATOR
+ '\xb7' # 0x00fa -> MIDDLE DOT
+ '\u221a' # 0x00fb -> SQUARE ROOT
+ '\u2116' # 0x00fc -> NUMERO SIGN
+ '\xa4' # 0x00fd -> CURRENCY SIGN
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a4: 0x00fd, # CURRENCY SIGN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b7: 0x00fa, # MIDDLE DOT
+ 0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO
+ 0x0404: 0x00f2, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ 0x0407: 0x00f4, # CYRILLIC CAPITAL LETTER YI
+ 0x040e: 0x00f6, # CYRILLIC CAPITAL LETTER SHORT U
+ 0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A
+ 0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE
+ 0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE
+ 0x0413: 0x0083, # CYRILLIC CAPITAL LETTER GHE
+ 0x0414: 0x0084, # CYRILLIC CAPITAL LETTER DE
+ 0x0415: 0x0085, # CYRILLIC CAPITAL LETTER IE
+ 0x0416: 0x0086, # CYRILLIC CAPITAL LETTER ZHE
+ 0x0417: 0x0087, # CYRILLIC CAPITAL LETTER ZE
+ 0x0418: 0x0088, # CYRILLIC CAPITAL LETTER I
+ 0x0419: 0x0089, # CYRILLIC CAPITAL LETTER SHORT I
+ 0x041a: 0x008a, # CYRILLIC CAPITAL LETTER KA
+ 0x041b: 0x008b, # CYRILLIC CAPITAL LETTER EL
+ 0x041c: 0x008c, # CYRILLIC CAPITAL LETTER EM
+ 0x041d: 0x008d, # CYRILLIC CAPITAL LETTER EN
+ 0x041e: 0x008e, # CYRILLIC CAPITAL LETTER O
+ 0x041f: 0x008f, # CYRILLIC CAPITAL LETTER PE
+ 0x0420: 0x0090, # CYRILLIC CAPITAL LETTER ER
+ 0x0421: 0x0091, # CYRILLIC CAPITAL LETTER ES
+ 0x0422: 0x0092, # CYRILLIC CAPITAL LETTER TE
+ 0x0423: 0x0093, # CYRILLIC CAPITAL LETTER U
+ 0x0424: 0x0094, # CYRILLIC CAPITAL LETTER EF
+ 0x0425: 0x0095, # CYRILLIC CAPITAL LETTER HA
+ 0x0426: 0x0096, # CYRILLIC CAPITAL LETTER TSE
+ 0x0427: 0x0097, # CYRILLIC CAPITAL LETTER CHE
+ 0x0428: 0x0098, # CYRILLIC CAPITAL LETTER SHA
+ 0x0429: 0x0099, # CYRILLIC CAPITAL LETTER SHCHA
+ 0x042a: 0x009a, # CYRILLIC CAPITAL LETTER HARD SIGN
+ 0x042b: 0x009b, # CYRILLIC CAPITAL LETTER YERU
+ 0x042c: 0x009c, # CYRILLIC CAPITAL LETTER SOFT SIGN
+ 0x042d: 0x009d, # CYRILLIC CAPITAL LETTER E
+ 0x042e: 0x009e, # CYRILLIC CAPITAL LETTER YU
+ 0x042f: 0x009f, # CYRILLIC CAPITAL LETTER YA
+ 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A
+ 0x0431: 0x00a1, # CYRILLIC SMALL LETTER BE
+ 0x0432: 0x00a2, # CYRILLIC SMALL LETTER VE
+ 0x0433: 0x00a3, # CYRILLIC SMALL LETTER GHE
+ 0x0434: 0x00a4, # CYRILLIC SMALL LETTER DE
+ 0x0435: 0x00a5, # CYRILLIC SMALL LETTER IE
+ 0x0436: 0x00a6, # CYRILLIC SMALL LETTER ZHE
+ 0x0437: 0x00a7, # CYRILLIC SMALL LETTER ZE
+ 0x0438: 0x00a8, # CYRILLIC SMALL LETTER I
+ 0x0439: 0x00a9, # CYRILLIC SMALL LETTER SHORT I
+ 0x043a: 0x00aa, # CYRILLIC SMALL LETTER KA
+ 0x043b: 0x00ab, # CYRILLIC SMALL LETTER EL
+ 0x043c: 0x00ac, # CYRILLIC SMALL LETTER EM
+ 0x043d: 0x00ad, # CYRILLIC SMALL LETTER EN
+ 0x043e: 0x00ae, # CYRILLIC SMALL LETTER O
+ 0x043f: 0x00af, # CYRILLIC SMALL LETTER PE
+ 0x0440: 0x00e0, # CYRILLIC SMALL LETTER ER
+ 0x0441: 0x00e1, # CYRILLIC SMALL LETTER ES
+ 0x0442: 0x00e2, # CYRILLIC SMALL LETTER TE
+ 0x0443: 0x00e3, # CYRILLIC SMALL LETTER U
+ 0x0444: 0x00e4, # CYRILLIC SMALL LETTER EF
+ 0x0445: 0x00e5, # CYRILLIC SMALL LETTER HA
+ 0x0446: 0x00e6, # CYRILLIC SMALL LETTER TSE
+ 0x0447: 0x00e7, # CYRILLIC SMALL LETTER CHE
+ 0x0448: 0x00e8, # CYRILLIC SMALL LETTER SHA
+ 0x0449: 0x00e9, # CYRILLIC SMALL LETTER SHCHA
+ 0x044a: 0x00ea, # CYRILLIC SMALL LETTER HARD SIGN
+ 0x044b: 0x00eb, # CYRILLIC SMALL LETTER YERU
+ 0x044c: 0x00ec, # CYRILLIC SMALL LETTER SOFT SIGN
+ 0x044d: 0x00ed, # CYRILLIC SMALL LETTER E
+ 0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU
+ 0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA
+ 0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO
+ 0x0454: 0x00f3, # CYRILLIC SMALL LETTER UKRAINIAN IE
+ 0x0457: 0x00f5, # CYRILLIC SMALL LETTER YI
+ 0x045e: 0x00f7, # CYRILLIC SMALL LETTER SHORT U
+ 0x2116: 0x00fc, # NUMERO SIGN
+ 0x2219: 0x00f9, # BULLET OPERATOR
+ 0x221a: 0x00fb, # SQUARE ROOT
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+ 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+ 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+ 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+ 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+ 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+ 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+ 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+ 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+ 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x258c: 0x00dd, # LEFT HALF BLOCK
+ 0x2590: 0x00de, # RIGHT HALF BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp869.py b/dist/lib/encodings/cp869.py
new file mode 100644
index 0000000..8d8a29b
--- /dev/null
+++ b/dist/lib/encodings/cp869.py
@@ -0,0 +1,689 @@
+""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP869.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_map)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_map)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp869',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+### Decoding Map
+
+decoding_map = codecs.make_identity_dict(range(256))
+decoding_map.update({
+ 0x0080: None, # UNDEFINED
+ 0x0081: None, # UNDEFINED
+ 0x0082: None, # UNDEFINED
+ 0x0083: None, # UNDEFINED
+ 0x0084: None, # UNDEFINED
+ 0x0085: None, # UNDEFINED
+ 0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS
+ 0x0087: None, # UNDEFINED
+ 0x0088: 0x00b7, # MIDDLE DOT
+ 0x0089: 0x00ac, # NOT SIGN
+ 0x008a: 0x00a6, # BROKEN BAR
+ 0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK
+ 0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK
+ 0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS
+ 0x008e: 0x2015, # HORIZONTAL BAR
+ 0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS
+ 0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS
+ 0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ 0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS
+ 0x0093: None, # UNDEFINED
+ 0x0094: None, # UNDEFINED
+ 0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS
+ 0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ 0x0097: 0x00a9, # COPYRIGHT SIGN
+ 0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS
+ 0x0099: 0x00b2, # SUPERSCRIPT TWO
+ 0x009a: 0x00b3, # SUPERSCRIPT THREE
+ 0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS
+ 0x009c: 0x00a3, # POUND SIGN
+ 0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS
+ 0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS
+ 0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS
+ 0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ 0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+ 0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS
+ 0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS
+ 0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA
+ 0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA
+ 0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA
+ 0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA
+ 0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON
+ 0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA
+ 0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA
+ 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF
+ 0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA
+ 0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA
+ 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00b0: 0x2591, # LIGHT SHADE
+ 0x00b1: 0x2592, # MEDIUM SHADE
+ 0x00b2: 0x2593, # DARK SHADE
+ 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL
+ 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA
+ 0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA
+ 0x00b7: 0x039c, # GREEK CAPITAL LETTER MU
+ 0x00b8: 0x039d, # GREEK CAPITAL LETTER NU
+ 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x00bd: 0x039e, # GREEK CAPITAL LETTER XI
+ 0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON
+ 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI
+ 0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO
+ 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA
+ 0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU
+ 0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON
+ 0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI
+ 0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI
+ 0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI
+ 0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA
+ 0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA
+ 0x00d7: 0x03b2, # GREEK SMALL LETTER BETA
+ 0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA
+ 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x00db: 0x2588, # FULL BLOCK
+ 0x00dc: 0x2584, # LOWER HALF BLOCK
+ 0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA
+ 0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON
+ 0x00df: 0x2580, # UPPER HALF BLOCK
+ 0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA
+ 0x00e1: 0x03b7, # GREEK SMALL LETTER ETA
+ 0x00e2: 0x03b8, # GREEK SMALL LETTER THETA
+ 0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA
+ 0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA
+ 0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA
+ 0x00e6: 0x03bc, # GREEK SMALL LETTER MU
+ 0x00e7: 0x03bd, # GREEK SMALL LETTER NU
+ 0x00e8: 0x03be, # GREEK SMALL LETTER XI
+ 0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON
+ 0x00ea: 0x03c0, # GREEK SMALL LETTER PI
+ 0x00eb: 0x03c1, # GREEK SMALL LETTER RHO
+ 0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA
+ 0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA
+ 0x00ee: 0x03c4, # GREEK SMALL LETTER TAU
+ 0x00ef: 0x0384, # GREEK TONOS
+ 0x00f0: 0x00ad, # SOFT HYPHEN
+ 0x00f1: 0x00b1, # PLUS-MINUS SIGN
+ 0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON
+ 0x00f3: 0x03c6, # GREEK SMALL LETTER PHI
+ 0x00f4: 0x03c7, # GREEK SMALL LETTER CHI
+ 0x00f5: 0x00a7, # SECTION SIGN
+ 0x00f6: 0x03c8, # GREEK SMALL LETTER PSI
+ 0x00f7: 0x0385, # GREEK DIALYTIKA TONOS
+ 0x00f8: 0x00b0, # DEGREE SIGN
+ 0x00f9: 0x00a8, # DIAERESIS
+ 0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA
+ 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ 0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+ 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS
+ 0x00fe: 0x25a0, # BLACK SQUARE
+ 0x00ff: 0x00a0, # NO-BREAK SPACE
+})
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x0000 -> NULL
+ '\x01' # 0x0001 -> START OF HEADING
+ '\x02' # 0x0002 -> START OF TEXT
+ '\x03' # 0x0003 -> END OF TEXT
+ '\x04' # 0x0004 -> END OF TRANSMISSION
+ '\x05' # 0x0005 -> ENQUIRY
+ '\x06' # 0x0006 -> ACKNOWLEDGE
+ '\x07' # 0x0007 -> BELL
+ '\x08' # 0x0008 -> BACKSPACE
+ '\t' # 0x0009 -> HORIZONTAL TABULATION
+ '\n' # 0x000a -> LINE FEED
+ '\x0b' # 0x000b -> VERTICAL TABULATION
+ '\x0c' # 0x000c -> FORM FEED
+ '\r' # 0x000d -> CARRIAGE RETURN
+ '\x0e' # 0x000e -> SHIFT OUT
+ '\x0f' # 0x000f -> SHIFT IN
+ '\x10' # 0x0010 -> DATA LINK ESCAPE
+ '\x11' # 0x0011 -> DEVICE CONTROL ONE
+ '\x12' # 0x0012 -> DEVICE CONTROL TWO
+ '\x13' # 0x0013 -> DEVICE CONTROL THREE
+ '\x14' # 0x0014 -> DEVICE CONTROL FOUR
+ '\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x0016 -> SYNCHRONOUS IDLE
+ '\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x0018 -> CANCEL
+ '\x19' # 0x0019 -> END OF MEDIUM
+ '\x1a' # 0x001a -> SUBSTITUTE
+ '\x1b' # 0x001b -> ESCAPE
+ '\x1c' # 0x001c -> FILE SEPARATOR
+ '\x1d' # 0x001d -> GROUP SEPARATOR
+ '\x1e' # 0x001e -> RECORD SEPARATOR
+ '\x1f' # 0x001f -> UNIT SEPARATOR
+ ' ' # 0x0020 -> SPACE
+ '!' # 0x0021 -> EXCLAMATION MARK
+ '"' # 0x0022 -> QUOTATION MARK
+ '#' # 0x0023 -> NUMBER SIGN
+ '$' # 0x0024 -> DOLLAR SIGN
+ '%' # 0x0025 -> PERCENT SIGN
+ '&' # 0x0026 -> AMPERSAND
+ "'" # 0x0027 -> APOSTROPHE
+ '(' # 0x0028 -> LEFT PARENTHESIS
+ ')' # 0x0029 -> RIGHT PARENTHESIS
+ '*' # 0x002a -> ASTERISK
+ '+' # 0x002b -> PLUS SIGN
+ ',' # 0x002c -> COMMA
+ '-' # 0x002d -> HYPHEN-MINUS
+ '.' # 0x002e -> FULL STOP
+ '/' # 0x002f -> SOLIDUS
+ '0' # 0x0030 -> DIGIT ZERO
+ '1' # 0x0031 -> DIGIT ONE
+ '2' # 0x0032 -> DIGIT TWO
+ '3' # 0x0033 -> DIGIT THREE
+ '4' # 0x0034 -> DIGIT FOUR
+ '5' # 0x0035 -> DIGIT FIVE
+ '6' # 0x0036 -> DIGIT SIX
+ '7' # 0x0037 -> DIGIT SEVEN
+ '8' # 0x0038 -> DIGIT EIGHT
+ '9' # 0x0039 -> DIGIT NINE
+ ':' # 0x003a -> COLON
+ ';' # 0x003b -> SEMICOLON
+ '<' # 0x003c -> LESS-THAN SIGN
+ '=' # 0x003d -> EQUALS SIGN
+ '>' # 0x003e -> GREATER-THAN SIGN
+ '?' # 0x003f -> QUESTION MARK
+ '@' # 0x0040 -> COMMERCIAL AT
+ 'A' # 0x0041 -> LATIN CAPITAL LETTER A
+ 'B' # 0x0042 -> LATIN CAPITAL LETTER B
+ 'C' # 0x0043 -> LATIN CAPITAL LETTER C
+ 'D' # 0x0044 -> LATIN CAPITAL LETTER D
+ 'E' # 0x0045 -> LATIN CAPITAL LETTER E
+ 'F' # 0x0046 -> LATIN CAPITAL LETTER F
+ 'G' # 0x0047 -> LATIN CAPITAL LETTER G
+ 'H' # 0x0048 -> LATIN CAPITAL LETTER H
+ 'I' # 0x0049 -> LATIN CAPITAL LETTER I
+ 'J' # 0x004a -> LATIN CAPITAL LETTER J
+ 'K' # 0x004b -> LATIN CAPITAL LETTER K
+ 'L' # 0x004c -> LATIN CAPITAL LETTER L
+ 'M' # 0x004d -> LATIN CAPITAL LETTER M
+ 'N' # 0x004e -> LATIN CAPITAL LETTER N
+ 'O' # 0x004f -> LATIN CAPITAL LETTER O
+ 'P' # 0x0050 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x0052 -> LATIN CAPITAL LETTER R
+ 'S' # 0x0053 -> LATIN CAPITAL LETTER S
+ 'T' # 0x0054 -> LATIN CAPITAL LETTER T
+ 'U' # 0x0055 -> LATIN CAPITAL LETTER U
+ 'V' # 0x0056 -> LATIN CAPITAL LETTER V
+ 'W' # 0x0057 -> LATIN CAPITAL LETTER W
+ 'X' # 0x0058 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x005a -> LATIN CAPITAL LETTER Z
+ '[' # 0x005b -> LEFT SQUARE BRACKET
+ '\\' # 0x005c -> REVERSE SOLIDUS
+ ']' # 0x005d -> RIGHT SQUARE BRACKET
+ '^' # 0x005e -> CIRCUMFLEX ACCENT
+ '_' # 0x005f -> LOW LINE
+ '`' # 0x0060 -> GRAVE ACCENT
+ 'a' # 0x0061 -> LATIN SMALL LETTER A
+ 'b' # 0x0062 -> LATIN SMALL LETTER B
+ 'c' # 0x0063 -> LATIN SMALL LETTER C
+ 'd' # 0x0064 -> LATIN SMALL LETTER D
+ 'e' # 0x0065 -> LATIN SMALL LETTER E
+ 'f' # 0x0066 -> LATIN SMALL LETTER F
+ 'g' # 0x0067 -> LATIN SMALL LETTER G
+ 'h' # 0x0068 -> LATIN SMALL LETTER H
+ 'i' # 0x0069 -> LATIN SMALL LETTER I
+ 'j' # 0x006a -> LATIN SMALL LETTER J
+ 'k' # 0x006b -> LATIN SMALL LETTER K
+ 'l' # 0x006c -> LATIN SMALL LETTER L
+ 'm' # 0x006d -> LATIN SMALL LETTER M
+ 'n' # 0x006e -> LATIN SMALL LETTER N
+ 'o' # 0x006f -> LATIN SMALL LETTER O
+ 'p' # 0x0070 -> LATIN SMALL LETTER P
+ 'q' # 0x0071 -> LATIN SMALL LETTER Q
+ 'r' # 0x0072 -> LATIN SMALL LETTER R
+ 's' # 0x0073 -> LATIN SMALL LETTER S
+ 't' # 0x0074 -> LATIN SMALL LETTER T
+ 'u' # 0x0075 -> LATIN SMALL LETTER U
+ 'v' # 0x0076 -> LATIN SMALL LETTER V
+ 'w' # 0x0077 -> LATIN SMALL LETTER W
+ 'x' # 0x0078 -> LATIN SMALL LETTER X
+ 'y' # 0x0079 -> LATIN SMALL LETTER Y
+ 'z' # 0x007a -> LATIN SMALL LETTER Z
+ '{' # 0x007b -> LEFT CURLY BRACKET
+ '|' # 0x007c -> VERTICAL LINE
+ '}' # 0x007d -> RIGHT CURLY BRACKET
+ '~' # 0x007e -> TILDE
+ '\x7f' # 0x007f -> DELETE
+ '\ufffe' # 0x0080 -> UNDEFINED
+ '\ufffe' # 0x0081 -> UNDEFINED
+ '\ufffe' # 0x0082 -> UNDEFINED
+ '\ufffe' # 0x0083 -> UNDEFINED
+ '\ufffe' # 0x0084 -> UNDEFINED
+ '\ufffe' # 0x0085 -> UNDEFINED
+ '\u0386' # 0x0086 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
+ '\ufffe' # 0x0087 -> UNDEFINED
+ '\xb7' # 0x0088 -> MIDDLE DOT
+ '\xac' # 0x0089 -> NOT SIGN
+ '\xa6' # 0x008a -> BROKEN BAR
+ '\u2018' # 0x008b -> LEFT SINGLE QUOTATION MARK
+ '\u2019' # 0x008c -> RIGHT SINGLE QUOTATION MARK
+ '\u0388' # 0x008d -> GREEK CAPITAL LETTER EPSILON WITH TONOS
+ '\u2015' # 0x008e -> HORIZONTAL BAR
+ '\u0389' # 0x008f -> GREEK CAPITAL LETTER ETA WITH TONOS
+ '\u038a' # 0x0090 -> GREEK CAPITAL LETTER IOTA WITH TONOS
+ '\u03aa' # 0x0091 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ '\u038c' # 0x0092 -> GREEK CAPITAL LETTER OMICRON WITH TONOS
+ '\ufffe' # 0x0093 -> UNDEFINED
+ '\ufffe' # 0x0094 -> UNDEFINED
+ '\u038e' # 0x0095 -> GREEK CAPITAL LETTER UPSILON WITH TONOS
+ '\u03ab' # 0x0096 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ '\xa9' # 0x0097 -> COPYRIGHT SIGN
+ '\u038f' # 0x0098 -> GREEK CAPITAL LETTER OMEGA WITH TONOS
+ '\xb2' # 0x0099 -> SUPERSCRIPT TWO
+ '\xb3' # 0x009a -> SUPERSCRIPT THREE
+ '\u03ac' # 0x009b -> GREEK SMALL LETTER ALPHA WITH TONOS
+ '\xa3' # 0x009c -> POUND SIGN
+ '\u03ad' # 0x009d -> GREEK SMALL LETTER EPSILON WITH TONOS
+ '\u03ae' # 0x009e -> GREEK SMALL LETTER ETA WITH TONOS
+ '\u03af' # 0x009f -> GREEK SMALL LETTER IOTA WITH TONOS
+ '\u03ca' # 0x00a0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ '\u0390' # 0x00a1 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+ '\u03cc' # 0x00a2 -> GREEK SMALL LETTER OMICRON WITH TONOS
+ '\u03cd' # 0x00a3 -> GREEK SMALL LETTER UPSILON WITH TONOS
+ '\u0391' # 0x00a4 -> GREEK CAPITAL LETTER ALPHA
+ '\u0392' # 0x00a5 -> GREEK CAPITAL LETTER BETA
+ '\u0393' # 0x00a6 -> GREEK CAPITAL LETTER GAMMA
+ '\u0394' # 0x00a7 -> GREEK CAPITAL LETTER DELTA
+ '\u0395' # 0x00a8 -> GREEK CAPITAL LETTER EPSILON
+ '\u0396' # 0x00a9 -> GREEK CAPITAL LETTER ZETA
+ '\u0397' # 0x00aa -> GREEK CAPITAL LETTER ETA
+ '\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF
+ '\u0398' # 0x00ac -> GREEK CAPITAL LETTER THETA
+ '\u0399' # 0x00ad -> GREEK CAPITAL LETTER IOTA
+ '\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u2591' # 0x00b0 -> LIGHT SHADE
+ '\u2592' # 0x00b1 -> MEDIUM SHADE
+ '\u2593' # 0x00b2 -> DARK SHADE
+ '\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+ '\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ '\u039a' # 0x00b5 -> GREEK CAPITAL LETTER KAPPA
+ '\u039b' # 0x00b6 -> GREEK CAPITAL LETTER LAMDA
+ '\u039c' # 0x00b7 -> GREEK CAPITAL LETTER MU
+ '\u039d' # 0x00b8 -> GREEK CAPITAL LETTER NU
+ '\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ '\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+ '\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+ '\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+ '\u039e' # 0x00bd -> GREEK CAPITAL LETTER XI
+ '\u039f' # 0x00be -> GREEK CAPITAL LETTER OMICRON
+ '\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+ '\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+ '\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ '\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ '\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ '\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+ '\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ '\u03a0' # 0x00c6 -> GREEK CAPITAL LETTER PI
+ '\u03a1' # 0x00c7 -> GREEK CAPITAL LETTER RHO
+ '\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+ '\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ '\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ '\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ '\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ '\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+ '\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ '\u03a3' # 0x00cf -> GREEK CAPITAL LETTER SIGMA
+ '\u03a4' # 0x00d0 -> GREEK CAPITAL LETTER TAU
+ '\u03a5' # 0x00d1 -> GREEK CAPITAL LETTER UPSILON
+ '\u03a6' # 0x00d2 -> GREEK CAPITAL LETTER PHI
+ '\u03a7' # 0x00d3 -> GREEK CAPITAL LETTER CHI
+ '\u03a8' # 0x00d4 -> GREEK CAPITAL LETTER PSI
+ '\u03a9' # 0x00d5 -> GREEK CAPITAL LETTER OMEGA
+ '\u03b1' # 0x00d6 -> GREEK SMALL LETTER ALPHA
+ '\u03b2' # 0x00d7 -> GREEK SMALL LETTER BETA
+ '\u03b3' # 0x00d8 -> GREEK SMALL LETTER GAMMA
+ '\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+ '\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+ '\u2588' # 0x00db -> FULL BLOCK
+ '\u2584' # 0x00dc -> LOWER HALF BLOCK
+ '\u03b4' # 0x00dd -> GREEK SMALL LETTER DELTA
+ '\u03b5' # 0x00de -> GREEK SMALL LETTER EPSILON
+ '\u2580' # 0x00df -> UPPER HALF BLOCK
+ '\u03b6' # 0x00e0 -> GREEK SMALL LETTER ZETA
+ '\u03b7' # 0x00e1 -> GREEK SMALL LETTER ETA
+ '\u03b8' # 0x00e2 -> GREEK SMALL LETTER THETA
+ '\u03b9' # 0x00e3 -> GREEK SMALL LETTER IOTA
+ '\u03ba' # 0x00e4 -> GREEK SMALL LETTER KAPPA
+ '\u03bb' # 0x00e5 -> GREEK SMALL LETTER LAMDA
+ '\u03bc' # 0x00e6 -> GREEK SMALL LETTER MU
+ '\u03bd' # 0x00e7 -> GREEK SMALL LETTER NU
+ '\u03be' # 0x00e8 -> GREEK SMALL LETTER XI
+ '\u03bf' # 0x00e9 -> GREEK SMALL LETTER OMICRON
+ '\u03c0' # 0x00ea -> GREEK SMALL LETTER PI
+ '\u03c1' # 0x00eb -> GREEK SMALL LETTER RHO
+ '\u03c3' # 0x00ec -> GREEK SMALL LETTER SIGMA
+ '\u03c2' # 0x00ed -> GREEK SMALL LETTER FINAL SIGMA
+ '\u03c4' # 0x00ee -> GREEK SMALL LETTER TAU
+ '\u0384' # 0x00ef -> GREEK TONOS
+ '\xad' # 0x00f0 -> SOFT HYPHEN
+ '\xb1' # 0x00f1 -> PLUS-MINUS SIGN
+ '\u03c5' # 0x00f2 -> GREEK SMALL LETTER UPSILON
+ '\u03c6' # 0x00f3 -> GREEK SMALL LETTER PHI
+ '\u03c7' # 0x00f4 -> GREEK SMALL LETTER CHI
+ '\xa7' # 0x00f5 -> SECTION SIGN
+ '\u03c8' # 0x00f6 -> GREEK SMALL LETTER PSI
+ '\u0385' # 0x00f7 -> GREEK DIALYTIKA TONOS
+ '\xb0' # 0x00f8 -> DEGREE SIGN
+ '\xa8' # 0x00f9 -> DIAERESIS
+ '\u03c9' # 0x00fa -> GREEK SMALL LETTER OMEGA
+ '\u03cb' # 0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ '\u03b0' # 0x00fc -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+ '\u03ce' # 0x00fd -> GREEK SMALL LETTER OMEGA WITH TONOS
+ '\u25a0' # 0x00fe -> BLACK SQUARE
+ '\xa0' # 0x00ff -> NO-BREAK SPACE
+)
+
+### Encoding Map
+
+encoding_map = {
+ 0x0000: 0x0000, # NULL
+ 0x0001: 0x0001, # START OF HEADING
+ 0x0002: 0x0002, # START OF TEXT
+ 0x0003: 0x0003, # END OF TEXT
+ 0x0004: 0x0004, # END OF TRANSMISSION
+ 0x0005: 0x0005, # ENQUIRY
+ 0x0006: 0x0006, # ACKNOWLEDGE
+ 0x0007: 0x0007, # BELL
+ 0x0008: 0x0008, # BACKSPACE
+ 0x0009: 0x0009, # HORIZONTAL TABULATION
+ 0x000a: 0x000a, # LINE FEED
+ 0x000b: 0x000b, # VERTICAL TABULATION
+ 0x000c: 0x000c, # FORM FEED
+ 0x000d: 0x000d, # CARRIAGE RETURN
+ 0x000e: 0x000e, # SHIFT OUT
+ 0x000f: 0x000f, # SHIFT IN
+ 0x0010: 0x0010, # DATA LINK ESCAPE
+ 0x0011: 0x0011, # DEVICE CONTROL ONE
+ 0x0012: 0x0012, # DEVICE CONTROL TWO
+ 0x0013: 0x0013, # DEVICE CONTROL THREE
+ 0x0014: 0x0014, # DEVICE CONTROL FOUR
+ 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
+ 0x0016: 0x0016, # SYNCHRONOUS IDLE
+ 0x0017: 0x0017, # END OF TRANSMISSION BLOCK
+ 0x0018: 0x0018, # CANCEL
+ 0x0019: 0x0019, # END OF MEDIUM
+ 0x001a: 0x001a, # SUBSTITUTE
+ 0x001b: 0x001b, # ESCAPE
+ 0x001c: 0x001c, # FILE SEPARATOR
+ 0x001d: 0x001d, # GROUP SEPARATOR
+ 0x001e: 0x001e, # RECORD SEPARATOR
+ 0x001f: 0x001f, # UNIT SEPARATOR
+ 0x0020: 0x0020, # SPACE
+ 0x0021: 0x0021, # EXCLAMATION MARK
+ 0x0022: 0x0022, # QUOTATION MARK
+ 0x0023: 0x0023, # NUMBER SIGN
+ 0x0024: 0x0024, # DOLLAR SIGN
+ 0x0025: 0x0025, # PERCENT SIGN
+ 0x0026: 0x0026, # AMPERSAND
+ 0x0027: 0x0027, # APOSTROPHE
+ 0x0028: 0x0028, # LEFT PARENTHESIS
+ 0x0029: 0x0029, # RIGHT PARENTHESIS
+ 0x002a: 0x002a, # ASTERISK
+ 0x002b: 0x002b, # PLUS SIGN
+ 0x002c: 0x002c, # COMMA
+ 0x002d: 0x002d, # HYPHEN-MINUS
+ 0x002e: 0x002e, # FULL STOP
+ 0x002f: 0x002f, # SOLIDUS
+ 0x0030: 0x0030, # DIGIT ZERO
+ 0x0031: 0x0031, # DIGIT ONE
+ 0x0032: 0x0032, # DIGIT TWO
+ 0x0033: 0x0033, # DIGIT THREE
+ 0x0034: 0x0034, # DIGIT FOUR
+ 0x0035: 0x0035, # DIGIT FIVE
+ 0x0036: 0x0036, # DIGIT SIX
+ 0x0037: 0x0037, # DIGIT SEVEN
+ 0x0038: 0x0038, # DIGIT EIGHT
+ 0x0039: 0x0039, # DIGIT NINE
+ 0x003a: 0x003a, # COLON
+ 0x003b: 0x003b, # SEMICOLON
+ 0x003c: 0x003c, # LESS-THAN SIGN
+ 0x003d: 0x003d, # EQUALS SIGN
+ 0x003e: 0x003e, # GREATER-THAN SIGN
+ 0x003f: 0x003f, # QUESTION MARK
+ 0x0040: 0x0040, # COMMERCIAL AT
+ 0x0041: 0x0041, # LATIN CAPITAL LETTER A
+ 0x0042: 0x0042, # LATIN CAPITAL LETTER B
+ 0x0043: 0x0043, # LATIN CAPITAL LETTER C
+ 0x0044: 0x0044, # LATIN CAPITAL LETTER D
+ 0x0045: 0x0045, # LATIN CAPITAL LETTER E
+ 0x0046: 0x0046, # LATIN CAPITAL LETTER F
+ 0x0047: 0x0047, # LATIN CAPITAL LETTER G
+ 0x0048: 0x0048, # LATIN CAPITAL LETTER H
+ 0x0049: 0x0049, # LATIN CAPITAL LETTER I
+ 0x004a: 0x004a, # LATIN CAPITAL LETTER J
+ 0x004b: 0x004b, # LATIN CAPITAL LETTER K
+ 0x004c: 0x004c, # LATIN CAPITAL LETTER L
+ 0x004d: 0x004d, # LATIN CAPITAL LETTER M
+ 0x004e: 0x004e, # LATIN CAPITAL LETTER N
+ 0x004f: 0x004f, # LATIN CAPITAL LETTER O
+ 0x0050: 0x0050, # LATIN CAPITAL LETTER P
+ 0x0051: 0x0051, # LATIN CAPITAL LETTER Q
+ 0x0052: 0x0052, # LATIN CAPITAL LETTER R
+ 0x0053: 0x0053, # LATIN CAPITAL LETTER S
+ 0x0054: 0x0054, # LATIN CAPITAL LETTER T
+ 0x0055: 0x0055, # LATIN CAPITAL LETTER U
+ 0x0056: 0x0056, # LATIN CAPITAL LETTER V
+ 0x0057: 0x0057, # LATIN CAPITAL LETTER W
+ 0x0058: 0x0058, # LATIN CAPITAL LETTER X
+ 0x0059: 0x0059, # LATIN CAPITAL LETTER Y
+ 0x005a: 0x005a, # LATIN CAPITAL LETTER Z
+ 0x005b: 0x005b, # LEFT SQUARE BRACKET
+ 0x005c: 0x005c, # REVERSE SOLIDUS
+ 0x005d: 0x005d, # RIGHT SQUARE BRACKET
+ 0x005e: 0x005e, # CIRCUMFLEX ACCENT
+ 0x005f: 0x005f, # LOW LINE
+ 0x0060: 0x0060, # GRAVE ACCENT
+ 0x0061: 0x0061, # LATIN SMALL LETTER A
+ 0x0062: 0x0062, # LATIN SMALL LETTER B
+ 0x0063: 0x0063, # LATIN SMALL LETTER C
+ 0x0064: 0x0064, # LATIN SMALL LETTER D
+ 0x0065: 0x0065, # LATIN SMALL LETTER E
+ 0x0066: 0x0066, # LATIN SMALL LETTER F
+ 0x0067: 0x0067, # LATIN SMALL LETTER G
+ 0x0068: 0x0068, # LATIN SMALL LETTER H
+ 0x0069: 0x0069, # LATIN SMALL LETTER I
+ 0x006a: 0x006a, # LATIN SMALL LETTER J
+ 0x006b: 0x006b, # LATIN SMALL LETTER K
+ 0x006c: 0x006c, # LATIN SMALL LETTER L
+ 0x006d: 0x006d, # LATIN SMALL LETTER M
+ 0x006e: 0x006e, # LATIN SMALL LETTER N
+ 0x006f: 0x006f, # LATIN SMALL LETTER O
+ 0x0070: 0x0070, # LATIN SMALL LETTER P
+ 0x0071: 0x0071, # LATIN SMALL LETTER Q
+ 0x0072: 0x0072, # LATIN SMALL LETTER R
+ 0x0073: 0x0073, # LATIN SMALL LETTER S
+ 0x0074: 0x0074, # LATIN SMALL LETTER T
+ 0x0075: 0x0075, # LATIN SMALL LETTER U
+ 0x0076: 0x0076, # LATIN SMALL LETTER V
+ 0x0077: 0x0077, # LATIN SMALL LETTER W
+ 0x0078: 0x0078, # LATIN SMALL LETTER X
+ 0x0079: 0x0079, # LATIN SMALL LETTER Y
+ 0x007a: 0x007a, # LATIN SMALL LETTER Z
+ 0x007b: 0x007b, # LEFT CURLY BRACKET
+ 0x007c: 0x007c, # VERTICAL LINE
+ 0x007d: 0x007d, # RIGHT CURLY BRACKET
+ 0x007e: 0x007e, # TILDE
+ 0x007f: 0x007f, # DELETE
+ 0x00a0: 0x00ff, # NO-BREAK SPACE
+ 0x00a3: 0x009c, # POUND SIGN
+ 0x00a6: 0x008a, # BROKEN BAR
+ 0x00a7: 0x00f5, # SECTION SIGN
+ 0x00a8: 0x00f9, # DIAERESIS
+ 0x00a9: 0x0097, # COPYRIGHT SIGN
+ 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00ac: 0x0089, # NOT SIGN
+ 0x00ad: 0x00f0, # SOFT HYPHEN
+ 0x00b0: 0x00f8, # DEGREE SIGN
+ 0x00b1: 0x00f1, # PLUS-MINUS SIGN
+ 0x00b2: 0x0099, # SUPERSCRIPT TWO
+ 0x00b3: 0x009a, # SUPERSCRIPT THREE
+ 0x00b7: 0x0088, # MIDDLE DOT
+ 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF
+ 0x0384: 0x00ef, # GREEK TONOS
+ 0x0385: 0x00f7, # GREEK DIALYTIKA TONOS
+ 0x0386: 0x0086, # GREEK CAPITAL LETTER ALPHA WITH TONOS
+ 0x0388: 0x008d, # GREEK CAPITAL LETTER EPSILON WITH TONOS
+ 0x0389: 0x008f, # GREEK CAPITAL LETTER ETA WITH TONOS
+ 0x038a: 0x0090, # GREEK CAPITAL LETTER IOTA WITH TONOS
+ 0x038c: 0x0092, # GREEK CAPITAL LETTER OMICRON WITH TONOS
+ 0x038e: 0x0095, # GREEK CAPITAL LETTER UPSILON WITH TONOS
+ 0x038f: 0x0098, # GREEK CAPITAL LETTER OMEGA WITH TONOS
+ 0x0390: 0x00a1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+ 0x0391: 0x00a4, # GREEK CAPITAL LETTER ALPHA
+ 0x0392: 0x00a5, # GREEK CAPITAL LETTER BETA
+ 0x0393: 0x00a6, # GREEK CAPITAL LETTER GAMMA
+ 0x0394: 0x00a7, # GREEK CAPITAL LETTER DELTA
+ 0x0395: 0x00a8, # GREEK CAPITAL LETTER EPSILON
+ 0x0396: 0x00a9, # GREEK CAPITAL LETTER ZETA
+ 0x0397: 0x00aa, # GREEK CAPITAL LETTER ETA
+ 0x0398: 0x00ac, # GREEK CAPITAL LETTER THETA
+ 0x0399: 0x00ad, # GREEK CAPITAL LETTER IOTA
+ 0x039a: 0x00b5, # GREEK CAPITAL LETTER KAPPA
+ 0x039b: 0x00b6, # GREEK CAPITAL LETTER LAMDA
+ 0x039c: 0x00b7, # GREEK CAPITAL LETTER MU
+ 0x039d: 0x00b8, # GREEK CAPITAL LETTER NU
+ 0x039e: 0x00bd, # GREEK CAPITAL LETTER XI
+ 0x039f: 0x00be, # GREEK CAPITAL LETTER OMICRON
+ 0x03a0: 0x00c6, # GREEK CAPITAL LETTER PI
+ 0x03a1: 0x00c7, # GREEK CAPITAL LETTER RHO
+ 0x03a3: 0x00cf, # GREEK CAPITAL LETTER SIGMA
+ 0x03a4: 0x00d0, # GREEK CAPITAL LETTER TAU
+ 0x03a5: 0x00d1, # GREEK CAPITAL LETTER UPSILON
+ 0x03a6: 0x00d2, # GREEK CAPITAL LETTER PHI
+ 0x03a7: 0x00d3, # GREEK CAPITAL LETTER CHI
+ 0x03a8: 0x00d4, # GREEK CAPITAL LETTER PSI
+ 0x03a9: 0x00d5, # GREEK CAPITAL LETTER OMEGA
+ 0x03aa: 0x0091, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ 0x03ab: 0x0096, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ 0x03ac: 0x009b, # GREEK SMALL LETTER ALPHA WITH TONOS
+ 0x03ad: 0x009d, # GREEK SMALL LETTER EPSILON WITH TONOS
+ 0x03ae: 0x009e, # GREEK SMALL LETTER ETA WITH TONOS
+ 0x03af: 0x009f, # GREEK SMALL LETTER IOTA WITH TONOS
+ 0x03b0: 0x00fc, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+ 0x03b1: 0x00d6, # GREEK SMALL LETTER ALPHA
+ 0x03b2: 0x00d7, # GREEK SMALL LETTER BETA
+ 0x03b3: 0x00d8, # GREEK SMALL LETTER GAMMA
+ 0x03b4: 0x00dd, # GREEK SMALL LETTER DELTA
+ 0x03b5: 0x00de, # GREEK SMALL LETTER EPSILON
+ 0x03b6: 0x00e0, # GREEK SMALL LETTER ZETA
+ 0x03b7: 0x00e1, # GREEK SMALL LETTER ETA
+ 0x03b8: 0x00e2, # GREEK SMALL LETTER THETA
+ 0x03b9: 0x00e3, # GREEK SMALL LETTER IOTA
+ 0x03ba: 0x00e4, # GREEK SMALL LETTER KAPPA
+ 0x03bb: 0x00e5, # GREEK SMALL LETTER LAMDA
+ 0x03bc: 0x00e6, # GREEK SMALL LETTER MU
+ 0x03bd: 0x00e7, # GREEK SMALL LETTER NU
+ 0x03be: 0x00e8, # GREEK SMALL LETTER XI
+ 0x03bf: 0x00e9, # GREEK SMALL LETTER OMICRON
+ 0x03c0: 0x00ea, # GREEK SMALL LETTER PI
+ 0x03c1: 0x00eb, # GREEK SMALL LETTER RHO
+ 0x03c2: 0x00ed, # GREEK SMALL LETTER FINAL SIGMA
+ 0x03c3: 0x00ec, # GREEK SMALL LETTER SIGMA
+ 0x03c4: 0x00ee, # GREEK SMALL LETTER TAU
+ 0x03c5: 0x00f2, # GREEK SMALL LETTER UPSILON
+ 0x03c6: 0x00f3, # GREEK SMALL LETTER PHI
+ 0x03c7: 0x00f4, # GREEK SMALL LETTER CHI
+ 0x03c8: 0x00f6, # GREEK SMALL LETTER PSI
+ 0x03c9: 0x00fa, # GREEK SMALL LETTER OMEGA
+ 0x03ca: 0x00a0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ 0x03cb: 0x00fb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ 0x03cc: 0x00a2, # GREEK SMALL LETTER OMICRON WITH TONOS
+ 0x03cd: 0x00a3, # GREEK SMALL LETTER UPSILON WITH TONOS
+ 0x03ce: 0x00fd, # GREEK SMALL LETTER OMEGA WITH TONOS
+ 0x2015: 0x008e, # HORIZONTAL BAR
+ 0x2018: 0x008b, # LEFT SINGLE QUOTATION MARK
+ 0x2019: 0x008c, # RIGHT SINGLE QUOTATION MARK
+ 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL
+ 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL
+ 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT
+ 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT
+ 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT
+ 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT
+ 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+ 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+ 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+ 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+ 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+ 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL
+ 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL
+ 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+ 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT
+ 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT
+ 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT
+ 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+ 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+ 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+ 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+ 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+ 0x2580: 0x00df, # UPPER HALF BLOCK
+ 0x2584: 0x00dc, # LOWER HALF BLOCK
+ 0x2588: 0x00db, # FULL BLOCK
+ 0x2591: 0x00b0, # LIGHT SHADE
+ 0x2592: 0x00b1, # MEDIUM SHADE
+ 0x2593: 0x00b2, # DARK SHADE
+ 0x25a0: 0x00fe, # BLACK SQUARE
+}
diff --git a/dist/lib/encodings/cp874.py b/dist/lib/encodings/cp874.py
new file mode 100644
index 0000000..59bfcbc
--- /dev/null
+++ b/dist/lib/encodings/cp874.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp874 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP874.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp874',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\u20ac' # 0x80 -> EURO SIGN
+ '\ufffe' # 0x81 -> UNDEFINED
+ '\ufffe' # 0x82 -> UNDEFINED
+ '\ufffe' # 0x83 -> UNDEFINED
+ '\ufffe' # 0x84 -> UNDEFINED
+ '\u2026' # 0x85 -> HORIZONTAL ELLIPSIS
+ '\ufffe' # 0x86 -> UNDEFINED
+ '\ufffe' # 0x87 -> UNDEFINED
+ '\ufffe' # 0x88 -> UNDEFINED
+ '\ufffe' # 0x89 -> UNDEFINED
+ '\ufffe' # 0x8A -> UNDEFINED
+ '\ufffe' # 0x8B -> UNDEFINED
+ '\ufffe' # 0x8C -> UNDEFINED
+ '\ufffe' # 0x8D -> UNDEFINED
+ '\ufffe' # 0x8E -> UNDEFINED
+ '\ufffe' # 0x8F -> UNDEFINED
+ '\ufffe' # 0x90 -> UNDEFINED
+ '\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK
+ '\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK
+ '\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK
+ '\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK
+ '\u2022' # 0x95 -> BULLET
+ '\u2013' # 0x96 -> EN DASH
+ '\u2014' # 0x97 -> EM DASH
+ '\ufffe' # 0x98 -> UNDEFINED
+ '\ufffe' # 0x99 -> UNDEFINED
+ '\ufffe' # 0x9A -> UNDEFINED
+ '\ufffe' # 0x9B -> UNDEFINED
+ '\ufffe' # 0x9C -> UNDEFINED
+ '\ufffe' # 0x9D -> UNDEFINED
+ '\ufffe' # 0x9E -> UNDEFINED
+ '\ufffe' # 0x9F -> UNDEFINED
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\u0e01' # 0xA1 -> THAI CHARACTER KO KAI
+ '\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI
+ '\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT
+ '\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI
+ '\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON
+ '\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG
+ '\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU
+ '\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN
+ '\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING
+ '\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG
+ '\u0e0b' # 0xAB -> THAI CHARACTER SO SO
+ '\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE
+ '\u0e0d' # 0xAD -> THAI CHARACTER YO YING
+ '\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA
+ '\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK
+ '\u0e10' # 0xB0 -> THAI CHARACTER THO THAN
+ '\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO
+ '\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO
+ '\u0e13' # 0xB3 -> THAI CHARACTER NO NEN
+ '\u0e14' # 0xB4 -> THAI CHARACTER DO DEK
+ '\u0e15' # 0xB5 -> THAI CHARACTER TO TAO
+ '\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG
+ '\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN
+ '\u0e18' # 0xB8 -> THAI CHARACTER THO THONG
+ '\u0e19' # 0xB9 -> THAI CHARACTER NO NU
+ '\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI
+ '\u0e1b' # 0xBB -> THAI CHARACTER PO PLA
+ '\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG
+ '\u0e1d' # 0xBD -> THAI CHARACTER FO FA
+ '\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN
+ '\u0e1f' # 0xBF -> THAI CHARACTER FO FAN
+ '\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO
+ '\u0e21' # 0xC1 -> THAI CHARACTER MO MA
+ '\u0e22' # 0xC2 -> THAI CHARACTER YO YAK
+ '\u0e23' # 0xC3 -> THAI CHARACTER RO RUA
+ '\u0e24' # 0xC4 -> THAI CHARACTER RU
+ '\u0e25' # 0xC5 -> THAI CHARACTER LO LING
+ '\u0e26' # 0xC6 -> THAI CHARACTER LU
+ '\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN
+ '\u0e28' # 0xC8 -> THAI CHARACTER SO SALA
+ '\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI
+ '\u0e2a' # 0xCA -> THAI CHARACTER SO SUA
+ '\u0e2b' # 0xCB -> THAI CHARACTER HO HIP
+ '\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA
+ '\u0e2d' # 0xCD -> THAI CHARACTER O ANG
+ '\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK
+ '\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI
+ '\u0e30' # 0xD0 -> THAI CHARACTER SARA A
+ '\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT
+ '\u0e32' # 0xD2 -> THAI CHARACTER SARA AA
+ '\u0e33' # 0xD3 -> THAI CHARACTER SARA AM
+ '\u0e34' # 0xD4 -> THAI CHARACTER SARA I
+ '\u0e35' # 0xD5 -> THAI CHARACTER SARA II
+ '\u0e36' # 0xD6 -> THAI CHARACTER SARA UE
+ '\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE
+ '\u0e38' # 0xD8 -> THAI CHARACTER SARA U
+ '\u0e39' # 0xD9 -> THAI CHARACTER SARA UU
+ '\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU
+ '\ufffe' # 0xDB -> UNDEFINED
+ '\ufffe' # 0xDC -> UNDEFINED
+ '\ufffe' # 0xDD -> UNDEFINED
+ '\ufffe' # 0xDE -> UNDEFINED
+ '\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT
+ '\u0e40' # 0xE0 -> THAI CHARACTER SARA E
+ '\u0e41' # 0xE1 -> THAI CHARACTER SARA AE
+ '\u0e42' # 0xE2 -> THAI CHARACTER SARA O
+ '\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN
+ '\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI
+ '\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO
+ '\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK
+ '\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU
+ '\u0e48' # 0xE8 -> THAI CHARACTER MAI EK
+ '\u0e49' # 0xE9 -> THAI CHARACTER MAI THO
+ '\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI
+ '\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA
+ '\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT
+ '\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT
+ '\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN
+ '\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN
+ '\u0e50' # 0xF0 -> THAI DIGIT ZERO
+ '\u0e51' # 0xF1 -> THAI DIGIT ONE
+ '\u0e52' # 0xF2 -> THAI DIGIT TWO
+ '\u0e53' # 0xF3 -> THAI DIGIT THREE
+ '\u0e54' # 0xF4 -> THAI DIGIT FOUR
+ '\u0e55' # 0xF5 -> THAI DIGIT FIVE
+ '\u0e56' # 0xF6 -> THAI DIGIT SIX
+ '\u0e57' # 0xF7 -> THAI DIGIT SEVEN
+ '\u0e58' # 0xF8 -> THAI DIGIT EIGHT
+ '\u0e59' # 0xF9 -> THAI DIGIT NINE
+ '\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU
+ '\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT
+ '\ufffe' # 0xFC -> UNDEFINED
+ '\ufffe' # 0xFD -> UNDEFINED
+ '\ufffe' # 0xFE -> UNDEFINED
+ '\ufffe' # 0xFF -> UNDEFINED
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp875.py b/dist/lib/encodings/cp875.py
new file mode 100644
index 0000000..c25a5a4
--- /dev/null
+++ b/dist/lib/encodings/cp875.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec cp875 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp875',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x9c' # 0x04 -> CONTROL
+ '\t' # 0x05 -> HORIZONTAL TABULATION
+ '\x86' # 0x06 -> CONTROL
+ '\x7f' # 0x07 -> DELETE
+ '\x97' # 0x08 -> CONTROL
+ '\x8d' # 0x09 -> CONTROL
+ '\x8e' # 0x0A -> CONTROL
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x9d' # 0x14 -> CONTROL
+ '\x85' # 0x15 -> CONTROL
+ '\x08' # 0x16 -> BACKSPACE
+ '\x87' # 0x17 -> CONTROL
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x92' # 0x1A -> CONTROL
+ '\x8f' # 0x1B -> CONTROL
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ '\x80' # 0x20 -> CONTROL
+ '\x81' # 0x21 -> CONTROL
+ '\x82' # 0x22 -> CONTROL
+ '\x83' # 0x23 -> CONTROL
+ '\x84' # 0x24 -> CONTROL
+ '\n' # 0x25 -> LINE FEED
+ '\x17' # 0x26 -> END OF TRANSMISSION BLOCK
+ '\x1b' # 0x27 -> ESCAPE
+ '\x88' # 0x28 -> CONTROL
+ '\x89' # 0x29 -> CONTROL
+ '\x8a' # 0x2A -> CONTROL
+ '\x8b' # 0x2B -> CONTROL
+ '\x8c' # 0x2C -> CONTROL
+ '\x05' # 0x2D -> ENQUIRY
+ '\x06' # 0x2E -> ACKNOWLEDGE
+ '\x07' # 0x2F -> BELL
+ '\x90' # 0x30 -> CONTROL
+ '\x91' # 0x31 -> CONTROL
+ '\x16' # 0x32 -> SYNCHRONOUS IDLE
+ '\x93' # 0x33 -> CONTROL
+ '\x94' # 0x34 -> CONTROL
+ '\x95' # 0x35 -> CONTROL
+ '\x96' # 0x36 -> CONTROL
+ '\x04' # 0x37 -> END OF TRANSMISSION
+ '\x98' # 0x38 -> CONTROL
+ '\x99' # 0x39 -> CONTROL
+ '\x9a' # 0x3A -> CONTROL
+ '\x9b' # 0x3B -> CONTROL
+ '\x14' # 0x3C -> DEVICE CONTROL FOUR
+ '\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE
+ '\x9e' # 0x3E -> CONTROL
+ '\x1a' # 0x3F -> SUBSTITUTE
+ ' ' # 0x40 -> SPACE
+ '\u0391' # 0x41 -> GREEK CAPITAL LETTER ALPHA
+ '\u0392' # 0x42 -> GREEK CAPITAL LETTER BETA
+ '\u0393' # 0x43 -> GREEK CAPITAL LETTER GAMMA
+ '\u0394' # 0x44 -> GREEK CAPITAL LETTER DELTA
+ '\u0395' # 0x45 -> GREEK CAPITAL LETTER EPSILON
+ '\u0396' # 0x46 -> GREEK CAPITAL LETTER ZETA
+ '\u0397' # 0x47 -> GREEK CAPITAL LETTER ETA
+ '\u0398' # 0x48 -> GREEK CAPITAL LETTER THETA
+ '\u0399' # 0x49 -> GREEK CAPITAL LETTER IOTA
+ '[' # 0x4A -> LEFT SQUARE BRACKET
+ '.' # 0x4B -> FULL STOP
+ '<' # 0x4C -> LESS-THAN SIGN
+ '(' # 0x4D -> LEFT PARENTHESIS
+ '+' # 0x4E -> PLUS SIGN
+ '!' # 0x4F -> EXCLAMATION MARK
+ '&' # 0x50 -> AMPERSAND
+ '\u039a' # 0x51 -> GREEK CAPITAL LETTER KAPPA
+ '\u039b' # 0x52 -> GREEK CAPITAL LETTER LAMDA
+ '\u039c' # 0x53 -> GREEK CAPITAL LETTER MU
+ '\u039d' # 0x54 -> GREEK CAPITAL LETTER NU
+ '\u039e' # 0x55 -> GREEK CAPITAL LETTER XI
+ '\u039f' # 0x56 -> GREEK CAPITAL LETTER OMICRON
+ '\u03a0' # 0x57 -> GREEK CAPITAL LETTER PI
+ '\u03a1' # 0x58 -> GREEK CAPITAL LETTER RHO
+ '\u03a3' # 0x59 -> GREEK CAPITAL LETTER SIGMA
+ ']' # 0x5A -> RIGHT SQUARE BRACKET
+ '$' # 0x5B -> DOLLAR SIGN
+ '*' # 0x5C -> ASTERISK
+ ')' # 0x5D -> RIGHT PARENTHESIS
+ ';' # 0x5E -> SEMICOLON
+ '^' # 0x5F -> CIRCUMFLEX ACCENT
+ '-' # 0x60 -> HYPHEN-MINUS
+ '/' # 0x61 -> SOLIDUS
+ '\u03a4' # 0x62 -> GREEK CAPITAL LETTER TAU
+ '\u03a5' # 0x63 -> GREEK CAPITAL LETTER UPSILON
+ '\u03a6' # 0x64 -> GREEK CAPITAL LETTER PHI
+ '\u03a7' # 0x65 -> GREEK CAPITAL LETTER CHI
+ '\u03a8' # 0x66 -> GREEK CAPITAL LETTER PSI
+ '\u03a9' # 0x67 -> GREEK CAPITAL LETTER OMEGA
+ '\u03aa' # 0x68 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+ '\u03ab' # 0x69 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+ '|' # 0x6A -> VERTICAL LINE
+ ',' # 0x6B -> COMMA
+ '%' # 0x6C -> PERCENT SIGN
+ '_' # 0x6D -> LOW LINE
+ '>' # 0x6E -> GREATER-THAN SIGN
+ '?' # 0x6F -> QUESTION MARK
+ '\xa8' # 0x70 -> DIAERESIS
+ '\u0386' # 0x71 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
+ '\u0388' # 0x72 -> GREEK CAPITAL LETTER EPSILON WITH TONOS
+ '\u0389' # 0x73 -> GREEK CAPITAL LETTER ETA WITH TONOS
+ '\xa0' # 0x74 -> NO-BREAK SPACE
+ '\u038a' # 0x75 -> GREEK CAPITAL LETTER IOTA WITH TONOS
+ '\u038c' # 0x76 -> GREEK CAPITAL LETTER OMICRON WITH TONOS
+ '\u038e' # 0x77 -> GREEK CAPITAL LETTER UPSILON WITH TONOS
+ '\u038f' # 0x78 -> GREEK CAPITAL LETTER OMEGA WITH TONOS
+ '`' # 0x79 -> GRAVE ACCENT
+ ':' # 0x7A -> COLON
+ '#' # 0x7B -> NUMBER SIGN
+ '@' # 0x7C -> COMMERCIAL AT
+ "'" # 0x7D -> APOSTROPHE
+ '=' # 0x7E -> EQUALS SIGN
+ '"' # 0x7F -> QUOTATION MARK
+ '\u0385' # 0x80 -> GREEK DIALYTIKA TONOS
+ 'a' # 0x81 -> LATIN SMALL LETTER A
+ 'b' # 0x82 -> LATIN SMALL LETTER B
+ 'c' # 0x83 -> LATIN SMALL LETTER C
+ 'd' # 0x84 -> LATIN SMALL LETTER D
+ 'e' # 0x85 -> LATIN SMALL LETTER E
+ 'f' # 0x86 -> LATIN SMALL LETTER F
+ 'g' # 0x87 -> LATIN SMALL LETTER G
+ 'h' # 0x88 -> LATIN SMALL LETTER H
+ 'i' # 0x89 -> LATIN SMALL LETTER I
+ '\u03b1' # 0x8A -> GREEK SMALL LETTER ALPHA
+ '\u03b2' # 0x8B -> GREEK SMALL LETTER BETA
+ '\u03b3' # 0x8C -> GREEK SMALL LETTER GAMMA
+ '\u03b4' # 0x8D -> GREEK SMALL LETTER DELTA
+ '\u03b5' # 0x8E -> GREEK SMALL LETTER EPSILON
+ '\u03b6' # 0x8F -> GREEK SMALL LETTER ZETA
+ '\xb0' # 0x90 -> DEGREE SIGN
+ 'j' # 0x91 -> LATIN SMALL LETTER J
+ 'k' # 0x92 -> LATIN SMALL LETTER K
+ 'l' # 0x93 -> LATIN SMALL LETTER L
+ 'm' # 0x94 -> LATIN SMALL LETTER M
+ 'n' # 0x95 -> LATIN SMALL LETTER N
+ 'o' # 0x96 -> LATIN SMALL LETTER O
+ 'p' # 0x97 -> LATIN SMALL LETTER P
+ 'q' # 0x98 -> LATIN SMALL LETTER Q
+ 'r' # 0x99 -> LATIN SMALL LETTER R
+ '\u03b7' # 0x9A -> GREEK SMALL LETTER ETA
+ '\u03b8' # 0x9B -> GREEK SMALL LETTER THETA
+ '\u03b9' # 0x9C -> GREEK SMALL LETTER IOTA
+ '\u03ba' # 0x9D -> GREEK SMALL LETTER KAPPA
+ '\u03bb' # 0x9E -> GREEK SMALL LETTER LAMDA
+ '\u03bc' # 0x9F -> GREEK SMALL LETTER MU
+ '\xb4' # 0xA0 -> ACUTE ACCENT
+ '~' # 0xA1 -> TILDE
+ 's' # 0xA2 -> LATIN SMALL LETTER S
+ 't' # 0xA3 -> LATIN SMALL LETTER T
+ 'u' # 0xA4 -> LATIN SMALL LETTER U
+ 'v' # 0xA5 -> LATIN SMALL LETTER V
+ 'w' # 0xA6 -> LATIN SMALL LETTER W
+ 'x' # 0xA7 -> LATIN SMALL LETTER X
+ 'y' # 0xA8 -> LATIN SMALL LETTER Y
+ 'z' # 0xA9 -> LATIN SMALL LETTER Z
+ '\u03bd' # 0xAA -> GREEK SMALL LETTER NU
+ '\u03be' # 0xAB -> GREEK SMALL LETTER XI
+ '\u03bf' # 0xAC -> GREEK SMALL LETTER OMICRON
+ '\u03c0' # 0xAD -> GREEK SMALL LETTER PI
+ '\u03c1' # 0xAE -> GREEK SMALL LETTER RHO
+ '\u03c3' # 0xAF -> GREEK SMALL LETTER SIGMA
+ '\xa3' # 0xB0 -> POUND SIGN
+ '\u03ac' # 0xB1 -> GREEK SMALL LETTER ALPHA WITH TONOS
+ '\u03ad' # 0xB2 -> GREEK SMALL LETTER EPSILON WITH TONOS
+ '\u03ae' # 0xB3 -> GREEK SMALL LETTER ETA WITH TONOS
+ '\u03ca' # 0xB4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
+ '\u03af' # 0xB5 -> GREEK SMALL LETTER IOTA WITH TONOS
+ '\u03cc' # 0xB6 -> GREEK SMALL LETTER OMICRON WITH TONOS
+ '\u03cd' # 0xB7 -> GREEK SMALL LETTER UPSILON WITH TONOS
+ '\u03cb' # 0xB8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+ '\u03ce' # 0xB9 -> GREEK SMALL LETTER OMEGA WITH TONOS
+ '\u03c2' # 0xBA -> GREEK SMALL LETTER FINAL SIGMA
+ '\u03c4' # 0xBB -> GREEK SMALL LETTER TAU
+ '\u03c5' # 0xBC -> GREEK SMALL LETTER UPSILON
+ '\u03c6' # 0xBD -> GREEK SMALL LETTER PHI
+ '\u03c7' # 0xBE -> GREEK SMALL LETTER CHI
+ '\u03c8' # 0xBF -> GREEK SMALL LETTER PSI
+ '{' # 0xC0 -> LEFT CURLY BRACKET
+ 'A' # 0xC1 -> LATIN CAPITAL LETTER A
+ 'B' # 0xC2 -> LATIN CAPITAL LETTER B
+ 'C' # 0xC3 -> LATIN CAPITAL LETTER C
+ 'D' # 0xC4 -> LATIN CAPITAL LETTER D
+ 'E' # 0xC5 -> LATIN CAPITAL LETTER E
+ 'F' # 0xC6 -> LATIN CAPITAL LETTER F
+ 'G' # 0xC7 -> LATIN CAPITAL LETTER G
+ 'H' # 0xC8 -> LATIN CAPITAL LETTER H
+ 'I' # 0xC9 -> LATIN CAPITAL LETTER I
+ '\xad' # 0xCA -> SOFT HYPHEN
+ '\u03c9' # 0xCB -> GREEK SMALL LETTER OMEGA
+ '\u0390' # 0xCC -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+ '\u03b0' # 0xCD -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+ '\u2018' # 0xCE -> LEFT SINGLE QUOTATION MARK
+ '\u2015' # 0xCF -> HORIZONTAL BAR
+ '}' # 0xD0 -> RIGHT CURLY BRACKET
+ 'J' # 0xD1 -> LATIN CAPITAL LETTER J
+ 'K' # 0xD2 -> LATIN CAPITAL LETTER K
+ 'L' # 0xD3 -> LATIN CAPITAL LETTER L
+ 'M' # 0xD4 -> LATIN CAPITAL LETTER M
+ 'N' # 0xD5 -> LATIN CAPITAL LETTER N
+ 'O' # 0xD6 -> LATIN CAPITAL LETTER O
+ 'P' # 0xD7 -> LATIN CAPITAL LETTER P
+ 'Q' # 0xD8 -> LATIN CAPITAL LETTER Q
+ 'R' # 0xD9 -> LATIN CAPITAL LETTER R
+ '\xb1' # 0xDA -> PLUS-MINUS SIGN
+ '\xbd' # 0xDB -> VULGAR FRACTION ONE HALF
+ '\x1a' # 0xDC -> SUBSTITUTE
+ '\u0387' # 0xDD -> GREEK ANO TELEIA
+ '\u2019' # 0xDE -> RIGHT SINGLE QUOTATION MARK
+ '\xa6' # 0xDF -> BROKEN BAR
+ '\\' # 0xE0 -> REVERSE SOLIDUS
+ '\x1a' # 0xE1 -> SUBSTITUTE
+ 'S' # 0xE2 -> LATIN CAPITAL LETTER S
+ 'T' # 0xE3 -> LATIN CAPITAL LETTER T
+ 'U' # 0xE4 -> LATIN CAPITAL LETTER U
+ 'V' # 0xE5 -> LATIN CAPITAL LETTER V
+ 'W' # 0xE6 -> LATIN CAPITAL LETTER W
+ 'X' # 0xE7 -> LATIN CAPITAL LETTER X
+ 'Y' # 0xE8 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0xE9 -> LATIN CAPITAL LETTER Z
+ '\xb2' # 0xEA -> SUPERSCRIPT TWO
+ '\xa7' # 0xEB -> SECTION SIGN
+ '\x1a' # 0xEC -> SUBSTITUTE
+ '\x1a' # 0xED -> SUBSTITUTE
+ '\xab' # 0xEE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xac' # 0xEF -> NOT SIGN
+ '0' # 0xF0 -> DIGIT ZERO
+ '1' # 0xF1 -> DIGIT ONE
+ '2' # 0xF2 -> DIGIT TWO
+ '3' # 0xF3 -> DIGIT THREE
+ '4' # 0xF4 -> DIGIT FOUR
+ '5' # 0xF5 -> DIGIT FIVE
+ '6' # 0xF6 -> DIGIT SIX
+ '7' # 0xF7 -> DIGIT SEVEN
+ '8' # 0xF8 -> DIGIT EIGHT
+ '9' # 0xF9 -> DIGIT NINE
+ '\xb3' # 0xFA -> SUPERSCRIPT THREE
+ '\xa9' # 0xFB -> COPYRIGHT SIGN
+ '\x1a' # 0xFC -> SUBSTITUTE
+ '\x1a' # 0xFD -> SUBSTITUTE
+ '\xbb' # 0xFE -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\x9f' # 0xFF -> CONTROL
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/cp932.py b/dist/lib/encodings/cp932.py
new file mode 100644
index 0000000..e01f59b
--- /dev/null
+++ b/dist/lib/encodings/cp932.py
@@ -0,0 +1,39 @@
+#
+# cp932.py: Python Unicode Codec for CP932
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_jp, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_jp.getcodec('cp932')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp932',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/cp949.py b/dist/lib/encodings/cp949.py
new file mode 100644
index 0000000..627c871
--- /dev/null
+++ b/dist/lib/encodings/cp949.py
@@ -0,0 +1,39 @@
+#
+# cp949.py: Python Unicode Codec for CP949
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_kr, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_kr.getcodec('cp949')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp949',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/cp950.py b/dist/lib/encodings/cp950.py
new file mode 100644
index 0000000..39eec5e
--- /dev/null
+++ b/dist/lib/encodings/cp950.py
@@ -0,0 +1,39 @@
+#
+# cp950.py: Python Unicode Codec for CP950
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_tw, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_tw.getcodec('cp950')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='cp950',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/euc_jis_2004.py b/dist/lib/encodings/euc_jis_2004.py
new file mode 100644
index 0000000..72b87ae
--- /dev/null
+++ b/dist/lib/encodings/euc_jis_2004.py
@@ -0,0 +1,39 @@
+#
+# euc_jis_2004.py: Python Unicode Codec for EUC_JIS_2004
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_jp, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_jp.getcodec('euc_jis_2004')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='euc_jis_2004',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/euc_jisx0213.py b/dist/lib/encodings/euc_jisx0213.py
new file mode 100644
index 0000000..cc47d04
--- /dev/null
+++ b/dist/lib/encodings/euc_jisx0213.py
@@ -0,0 +1,39 @@
+#
+# euc_jisx0213.py: Python Unicode Codec for EUC_JISX0213
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_jp, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_jp.getcodec('euc_jisx0213')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='euc_jisx0213',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/euc_jp.py b/dist/lib/encodings/euc_jp.py
new file mode 100644
index 0000000..7bcbe41
--- /dev/null
+++ b/dist/lib/encodings/euc_jp.py
@@ -0,0 +1,39 @@
+#
+# euc_jp.py: Python Unicode Codec for EUC_JP
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_jp, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_jp.getcodec('euc_jp')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='euc_jp',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/euc_kr.py b/dist/lib/encodings/euc_kr.py
new file mode 100644
index 0000000..c1fb126
--- /dev/null
+++ b/dist/lib/encodings/euc_kr.py
@@ -0,0 +1,39 @@
+#
+# euc_kr.py: Python Unicode Codec for EUC_KR
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_kr, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_kr.getcodec('euc_kr')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='euc_kr',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/gb18030.py b/dist/lib/encodings/gb18030.py
new file mode 100644
index 0000000..34fb6c3
--- /dev/null
+++ b/dist/lib/encodings/gb18030.py
@@ -0,0 +1,39 @@
+#
+# gb18030.py: Python Unicode Codec for GB18030
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_cn, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_cn.getcodec('gb18030')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='gb18030',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/gb2312.py b/dist/lib/encodings/gb2312.py
new file mode 100644
index 0000000..3c3b837
--- /dev/null
+++ b/dist/lib/encodings/gb2312.py
@@ -0,0 +1,39 @@
+#
+# gb2312.py: Python Unicode Codec for GB2312
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_cn, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_cn.getcodec('gb2312')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='gb2312',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/gbk.py b/dist/lib/encodings/gbk.py
new file mode 100644
index 0000000..1b45db8
--- /dev/null
+++ b/dist/lib/encodings/gbk.py
@@ -0,0 +1,39 @@
+#
+# gbk.py: Python Unicode Codec for GBK
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_cn, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_cn.getcodec('gbk')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='gbk',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/hex_codec.py b/dist/lib/encodings/hex_codec.py
new file mode 100644
index 0000000..9fb1072
--- /dev/null
+++ b/dist/lib/encodings/hex_codec.py
@@ -0,0 +1,55 @@
+"""Python 'hex_codec' Codec - 2-digit hex content transfer encoding.
+
+This codec de/encodes from bytes to bytes.
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+"""
+
+import codecs
+import binascii
+
+### Codec APIs
+
+def hex_encode(input, errors='strict'):
+ assert errors == 'strict'
+ return (binascii.b2a_hex(input), len(input))
+
+def hex_decode(input, errors='strict'):
+ assert errors == 'strict'
+ return (binascii.a2b_hex(input), len(input))
+
+class Codec(codecs.Codec):
+ def encode(self, input, errors='strict'):
+ return hex_encode(input, errors)
+ def decode(self, input, errors='strict'):
+ return hex_decode(input, errors)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ assert self.errors == 'strict'
+ return binascii.b2a_hex(input)
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ assert self.errors == 'strict'
+ return binascii.a2b_hex(input)
+
+class StreamWriter(Codec, codecs.StreamWriter):
+ charbuffertype = bytes
+
+class StreamReader(Codec, codecs.StreamReader):
+ charbuffertype = bytes
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='hex',
+ encode=hex_encode,
+ decode=hex_decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamwriter=StreamWriter,
+ streamreader=StreamReader,
+ _is_text_encoding=False,
+ )
diff --git a/dist/lib/encodings/hp_roman8.py b/dist/lib/encodings/hp_roman8.py
new file mode 100644
index 0000000..58de103
--- /dev/null
+++ b/dist/lib/encodings/hp_roman8.py
@@ -0,0 +1,314 @@
+""" Python Character Mapping Codec generated from 'hp_roman8.txt' with gencodec.py.
+
+ Based on data from ftp://dkuug.dk/i18n/charmaps/HP-ROMAN8 (Keld Simonsen)
+
+ Original source: LaserJet IIP Printer User's Manual HP part no
+ 33471-90901, Hewlet-Packard, June 1989.
+
+ (Used with permission)
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='hp-roman8',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamwriter=StreamWriter,
+ streamreader=StreamReader,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\x80' # 0x80 ->
+ '\x81' # 0x81 ->
+ '\x82' # 0x82 ->
+ '\x83' # 0x83 ->
+ '\x84' # 0x84 ->
+ '\x85' # 0x85 ->
+ '\x86' # 0x86 ->
+ '\x87' # 0x87 ->
+ '\x88' # 0x88 ->
+ '\x89' # 0x89 ->
+ '\x8a' # 0x8A ->
+ '\x8b' # 0x8B ->
+ '\x8c' # 0x8C ->
+ '\x8d' # 0x8D ->
+ '\x8e' # 0x8E ->
+ '\x8f' # 0x8F ->
+ '\x90' # 0x90 ->
+ '\x91' # 0x91 ->
+ '\x92' # 0x92 ->
+ '\x93' # 0x93 ->
+ '\x94' # 0x94 ->
+ '\x95' # 0x95 ->
+ '\x96' # 0x96 ->
+ '\x97' # 0x97 ->
+ '\x98' # 0x98 ->
+ '\x99' # 0x99 ->
+ '\x9a' # 0x9A ->
+ '\x9b' # 0x9B ->
+ '\x9c' # 0x9C ->
+ '\x9d' # 0x9D ->
+ '\x9e' # 0x9E ->
+ '\x9f' # 0x9F ->
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\xc0' # 0xA1 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xc2' # 0xA2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\xc8' # 0xA3 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\xca' # 0xA4 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xcb' # 0xA5 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\xce' # 0xA6 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\xcf' # 0xA7 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\xb4' # 0xA8 -> ACUTE ACCENT
+ '\u02cb' # 0xA9 -> MODIFIER LETTER GRAVE ACCENT (MANDARIN CHINESE FOURTH TONE)
+ '\u02c6' # 0xAA -> MODIFIER LETTER CIRCUMFLEX ACCENT
+ '\xa8' # 0xAB -> DIAERESIS
+ '\u02dc' # 0xAC -> SMALL TILDE
+ '\xd9' # 0xAD -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xdb' # 0xAE -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ '\u20a4' # 0xAF -> LIRA SIGN
+ '\xaf' # 0xB0 -> MACRON
+ '\xdd' # 0xB1 -> LATIN CAPITAL LETTER Y WITH ACUTE
+ '\xfd' # 0xB2 -> LATIN SMALL LETTER Y WITH ACUTE
+ '\xb0' # 0xB3 -> DEGREE SIGN
+ '\xc7' # 0xB4 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xe7' # 0xB5 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xd1' # 0xB6 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xf1' # 0xB7 -> LATIN SMALL LETTER N WITH TILDE
+ '\xa1' # 0xB8 -> INVERTED EXCLAMATION MARK
+ '\xbf' # 0xB9 -> INVERTED QUESTION MARK
+ '\xa4' # 0xBA -> CURRENCY SIGN
+ '\xa3' # 0xBB -> POUND SIGN
+ '\xa5' # 0xBC -> YEN SIGN
+ '\xa7' # 0xBD -> SECTION SIGN
+ '\u0192' # 0xBE -> LATIN SMALL LETTER F WITH HOOK
+ '\xa2' # 0xBF -> CENT SIGN
+ '\xe2' # 0xC0 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xea' # 0xC1 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xf4' # 0xC2 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xfb' # 0xC3 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xe1' # 0xC4 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xe9' # 0xC5 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xf3' # 0xC6 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xfa' # 0xC7 -> LATIN SMALL LETTER U WITH ACUTE
+ '\xe0' # 0xC8 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe8' # 0xC9 -> LATIN SMALL LETTER E WITH GRAVE
+ '\xf2' # 0xCA -> LATIN SMALL LETTER O WITH GRAVE
+ '\xf9' # 0xCB -> LATIN SMALL LETTER U WITH GRAVE
+ '\xe4' # 0xCC -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xeb' # 0xCD -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xf6' # 0xCE -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xfc' # 0xCF -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xc5' # 0xD0 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xee' # 0xD1 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xd8' # 0xD2 -> LATIN CAPITAL LETTER O WITH STROKE
+ '\xc6' # 0xD3 -> LATIN CAPITAL LETTER AE
+ '\xe5' # 0xD4 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xed' # 0xD5 -> LATIN SMALL LETTER I WITH ACUTE
+ '\xf8' # 0xD6 -> LATIN SMALL LETTER O WITH STROKE
+ '\xe6' # 0xD7 -> LATIN SMALL LETTER AE
+ '\xc4' # 0xD8 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xec' # 0xD9 -> LATIN SMALL LETTER I WITH GRAVE
+ '\xd6' # 0xDA -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xdc' # 0xDB -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xc9' # 0xDC -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xef' # 0xDD -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xdf' # 0xDE -> LATIN SMALL LETTER SHARP S (GERMAN)
+ '\xd4' # 0xDF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\xc1' # 0xE0 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc3' # 0xE1 -> LATIN CAPITAL LETTER A WITH TILDE
+ '\xe3' # 0xE2 -> LATIN SMALL LETTER A WITH TILDE
+ '\xd0' # 0xE3 -> LATIN CAPITAL LETTER ETH (ICELANDIC)
+ '\xf0' # 0xE4 -> LATIN SMALL LETTER ETH (ICELANDIC)
+ '\xcd' # 0xE5 -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xcc' # 0xE6 -> LATIN CAPITAL LETTER I WITH GRAVE
+ '\xd3' # 0xE7 -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xd2' # 0xE8 -> LATIN CAPITAL LETTER O WITH GRAVE
+ '\xd5' # 0xE9 -> LATIN CAPITAL LETTER O WITH TILDE
+ '\xf5' # 0xEA -> LATIN SMALL LETTER O WITH TILDE
+ '\u0160' # 0xEB -> LATIN CAPITAL LETTER S WITH CARON
+ '\u0161' # 0xEC -> LATIN SMALL LETTER S WITH CARON
+ '\xda' # 0xED -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\u0178' # 0xEE -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+ '\xff' # 0xEF -> LATIN SMALL LETTER Y WITH DIAERESIS
+ '\xde' # 0xF0 -> LATIN CAPITAL LETTER THORN (ICELANDIC)
+ '\xfe' # 0xF1 -> LATIN SMALL LETTER THORN (ICELANDIC)
+ '\xb7' # 0xF2 -> MIDDLE DOT
+ '\xb5' # 0xF3 -> MICRO SIGN
+ '\xb6' # 0xF4 -> PILCROW SIGN
+ '\xbe' # 0xF5 -> VULGAR FRACTION THREE QUARTERS
+ '\u2014' # 0xF6 -> EM DASH
+ '\xbc' # 0xF7 -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xF8 -> VULGAR FRACTION ONE HALF
+ '\xaa' # 0xF9 -> FEMININE ORDINAL INDICATOR
+ '\xba' # 0xFA -> MASCULINE ORDINAL INDICATOR
+ '\xab' # 0xFB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\u25a0' # 0xFC -> BLACK SQUARE
+ '\xbb' # 0xFD -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xb1' # 0xFE -> PLUS-MINUS SIGN
+ '\ufffe'
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/hz.py b/dist/lib/encodings/hz.py
new file mode 100644
index 0000000..383442a
--- /dev/null
+++ b/dist/lib/encodings/hz.py
@@ -0,0 +1,39 @@
+#
+# hz.py: Python Unicode Codec for HZ
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_cn, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_cn.getcodec('hz')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='hz',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/idna.py b/dist/lib/encodings/idna.py
new file mode 100644
index 0000000..ea40585
--- /dev/null
+++ b/dist/lib/encodings/idna.py
@@ -0,0 +1,309 @@
+# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
+
+import stringprep, re, codecs
+from unicodedata import ucd_3_2_0 as unicodedata
+
+# IDNA section 3.1
+dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
+
+# IDNA section 5
+ace_prefix = b"xn--"
+sace_prefix = "xn--"
+
+# This assumes query strings, so AllowUnassigned is true
+def nameprep(label):
+ # Map
+ newlabel = []
+ for c in label:
+ if stringprep.in_table_b1(c):
+ # Map to nothing
+ continue
+ newlabel.append(stringprep.map_table_b2(c))
+ label = "".join(newlabel)
+
+ # Normalize
+ label = unicodedata.normalize("NFKC", label)
+
+ # Prohibit
+ for c in label:
+ if stringprep.in_table_c12(c) or \
+ stringprep.in_table_c22(c) or \
+ stringprep.in_table_c3(c) or \
+ stringprep.in_table_c4(c) or \
+ stringprep.in_table_c5(c) or \
+ stringprep.in_table_c6(c) or \
+ stringprep.in_table_c7(c) or \
+ stringprep.in_table_c8(c) or \
+ stringprep.in_table_c9(c):
+ raise UnicodeError("Invalid character %r" % c)
+
+ # Check bidi
+ RandAL = [stringprep.in_table_d1(x) for x in label]
+ for c in RandAL:
+ if c:
+ # There is a RandAL char in the string. Must perform further
+ # tests:
+ # 1) The characters in section 5.8 MUST be prohibited.
+ # This is table C.8, which was already checked
+ # 2) If a string contains any RandALCat character, the string
+ # MUST NOT contain any LCat character.
+ if any(stringprep.in_table_d2(x) for x in label):
+ raise UnicodeError("Violation of BIDI requirement 2")
+
+ # 3) If a string contains any RandALCat character, a
+ # RandALCat character MUST be the first character of the
+ # string, and a RandALCat character MUST be the last
+ # character of the string.
+ if not RandAL[0] or not RandAL[-1]:
+ raise UnicodeError("Violation of BIDI requirement 3")
+
+ return label
+
+def ToASCII(label):
+ try:
+ # Step 1: try ASCII
+ label = label.encode("ascii")
+ except UnicodeError:
+ pass
+ else:
+ # Skip to step 3: UseSTD3ASCIIRules is false, so
+ # Skip to step 8.
+ if 0 < len(label) < 64:
+ return label
+ raise UnicodeError("label empty or too long")
+
+ # Step 2: nameprep
+ label = nameprep(label)
+
+ # Step 3: UseSTD3ASCIIRules is false
+ # Step 4: try ASCII
+ try:
+ label = label.encode("ascii")
+ except UnicodeError:
+ pass
+ else:
+ # Skip to step 8.
+ if 0 < len(label) < 64:
+ return label
+ raise UnicodeError("label empty or too long")
+
+ # Step 5: Check ACE prefix
+ if label.startswith(sace_prefix):
+ raise UnicodeError("Label starts with ACE prefix")
+
+ # Step 6: Encode with PUNYCODE
+ label = label.encode("punycode")
+
+ # Step 7: Prepend ACE prefix
+ label = ace_prefix + label
+
+ # Step 8: Check size
+ if 0 < len(label) < 64:
+ return label
+ raise UnicodeError("label empty or too long")
+
+def ToUnicode(label):
+ # Step 1: Check for ASCII
+ if isinstance(label, bytes):
+ pure_ascii = True
+ else:
+ try:
+ label = label.encode("ascii")
+ pure_ascii = True
+ except UnicodeError:
+ pure_ascii = False
+ if not pure_ascii:
+ # Step 2: Perform nameprep
+ label = nameprep(label)
+ # It doesn't say this, but apparently, it should be ASCII now
+ try:
+ label = label.encode("ascii")
+ except UnicodeError:
+ raise UnicodeError("Invalid character in IDN label")
+ # Step 3: Check for ACE prefix
+ if not label.startswith(ace_prefix):
+ return str(label, "ascii")
+
+ # Step 4: Remove ACE prefix
+ label1 = label[len(ace_prefix):]
+
+ # Step 5: Decode using PUNYCODE
+ result = label1.decode("punycode")
+
+ # Step 6: Apply ToASCII
+ label2 = ToASCII(result)
+
+ # Step 7: Compare the result of step 6 with the one of step 3
+ # label2 will already be in lower case.
+ if str(label, "ascii").lower() != str(label2, "ascii"):
+ raise UnicodeError("IDNA does not round-trip", label, label2)
+
+ # Step 8: return the result of step 5
+ return result
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+ def encode(self, input, errors='strict'):
+
+ if errors != 'strict':
+ # IDNA is quite clear that implementations must be strict
+ raise UnicodeError("unsupported error handling "+errors)
+
+ if not input:
+ return b'', 0
+
+ try:
+ result = input.encode('ascii')
+ except UnicodeEncodeError:
+ pass
+ else:
+ # ASCII name: fast path
+ labels = result.split(b'.')
+ for label in labels[:-1]:
+ if not (0 < len(label) < 64):
+ raise UnicodeError("label empty or too long")
+ if len(labels[-1]) >= 64:
+ raise UnicodeError("label too long")
+ return result, len(input)
+
+ result = bytearray()
+ labels = dots.split(input)
+ if labels and not labels[-1]:
+ trailing_dot = b'.'
+ del labels[-1]
+ else:
+ trailing_dot = b''
+ for label in labels:
+ if result:
+ # Join with U+002E
+ result.extend(b'.')
+ result.extend(ToASCII(label))
+ return bytes(result+trailing_dot), len(input)
+
+ def decode(self, input, errors='strict'):
+
+ if errors != 'strict':
+ raise UnicodeError("Unsupported error handling "+errors)
+
+ if not input:
+ return "", 0
+
+ # IDNA allows decoding to operate on Unicode strings, too.
+ if not isinstance(input, bytes):
+ # XXX obviously wrong, see #3232
+ input = bytes(input)
+
+ if ace_prefix not in input:
+ # Fast path
+ try:
+ return input.decode('ascii'), len(input)
+ except UnicodeDecodeError:
+ pass
+
+ labels = input.split(b".")
+
+ if labels and len(labels[-1]) == 0:
+ trailing_dot = '.'
+ del labels[-1]
+ else:
+ trailing_dot = ''
+
+ result = []
+ for label in labels:
+ result.append(ToUnicode(label))
+
+ return ".".join(result)+trailing_dot, len(input)
+
+class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
+ def _buffer_encode(self, input, errors, final):
+ if errors != 'strict':
+ # IDNA is quite clear that implementations must be strict
+ raise UnicodeError("unsupported error handling "+errors)
+
+ if not input:
+ return (b'', 0)
+
+ labels = dots.split(input)
+ trailing_dot = b''
+ if labels:
+ if not labels[-1]:
+ trailing_dot = b'.'
+ del labels[-1]
+ elif not final:
+ # Keep potentially unfinished label until the next call
+ del labels[-1]
+ if labels:
+ trailing_dot = b'.'
+
+ result = bytearray()
+ size = 0
+ for label in labels:
+ if size:
+ # Join with U+002E
+ result.extend(b'.')
+ size += 1
+ result.extend(ToASCII(label))
+ size += len(label)
+
+ result += trailing_dot
+ size += len(trailing_dot)
+ return (bytes(result), size)
+
+class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
+ def _buffer_decode(self, input, errors, final):
+ if errors != 'strict':
+ raise UnicodeError("Unsupported error handling "+errors)
+
+ if not input:
+ return ("", 0)
+
+ # IDNA allows decoding to operate on Unicode strings, too.
+ if isinstance(input, str):
+ labels = dots.split(input)
+ else:
+ # Must be ASCII string
+ input = str(input, "ascii")
+ labels = input.split(".")
+
+ trailing_dot = ''
+ if labels:
+ if not labels[-1]:
+ trailing_dot = '.'
+ del labels[-1]
+ elif not final:
+ # Keep potentially unfinished label until the next call
+ del labels[-1]
+ if labels:
+ trailing_dot = '.'
+
+ result = []
+ size = 0
+ for label in labels:
+ result.append(ToUnicode(label))
+ if size:
+ size += 1
+ size += len(label)
+
+ result = ".".join(result) + trailing_dot
+ size += len(trailing_dot)
+ return (result, size)
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='idna',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamwriter=StreamWriter,
+ streamreader=StreamReader,
+ )
diff --git a/dist/lib/encodings/iso2022_jp.py b/dist/lib/encodings/iso2022_jp.py
new file mode 100644
index 0000000..ab04060
--- /dev/null
+++ b/dist/lib/encodings/iso2022_jp.py
@@ -0,0 +1,39 @@
+#
+# iso2022_jp.py: Python Unicode Codec for ISO2022_JP
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_iso2022, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_iso2022.getcodec('iso2022_jp')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='iso2022_jp',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/iso2022_jp_1.py b/dist/lib/encodings/iso2022_jp_1.py
new file mode 100644
index 0000000..997044d
--- /dev/null
+++ b/dist/lib/encodings/iso2022_jp_1.py
@@ -0,0 +1,39 @@
+#
+# iso2022_jp_1.py: Python Unicode Codec for ISO2022_JP_1
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_iso2022, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_iso2022.getcodec('iso2022_jp_1')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='iso2022_jp_1',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/iso2022_jp_2.py b/dist/lib/encodings/iso2022_jp_2.py
new file mode 100644
index 0000000..9106bf7
--- /dev/null
+++ b/dist/lib/encodings/iso2022_jp_2.py
@@ -0,0 +1,39 @@
+#
+# iso2022_jp_2.py: Python Unicode Codec for ISO2022_JP_2
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_iso2022, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_iso2022.getcodec('iso2022_jp_2')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='iso2022_jp_2',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/iso2022_jp_2004.py b/dist/lib/encodings/iso2022_jp_2004.py
new file mode 100644
index 0000000..40198bf
--- /dev/null
+++ b/dist/lib/encodings/iso2022_jp_2004.py
@@ -0,0 +1,39 @@
+#
+# iso2022_jp_2004.py: Python Unicode Codec for ISO2022_JP_2004
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_iso2022, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_iso2022.getcodec('iso2022_jp_2004')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='iso2022_jp_2004',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/iso2022_jp_3.py b/dist/lib/encodings/iso2022_jp_3.py
new file mode 100644
index 0000000..346e08b
--- /dev/null
+++ b/dist/lib/encodings/iso2022_jp_3.py
@@ -0,0 +1,39 @@
+#
+# iso2022_jp_3.py: Python Unicode Codec for ISO2022_JP_3
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_iso2022, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_iso2022.getcodec('iso2022_jp_3')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='iso2022_jp_3',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/iso2022_jp_ext.py b/dist/lib/encodings/iso2022_jp_ext.py
new file mode 100644
index 0000000..752bab9
--- /dev/null
+++ b/dist/lib/encodings/iso2022_jp_ext.py
@@ -0,0 +1,39 @@
+#
+# iso2022_jp_ext.py: Python Unicode Codec for ISO2022_JP_EXT
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_iso2022, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_iso2022.getcodec('iso2022_jp_ext')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='iso2022_jp_ext',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/iso2022_kr.py b/dist/lib/encodings/iso2022_kr.py
new file mode 100644
index 0000000..bf70187
--- /dev/null
+++ b/dist/lib/encodings/iso2022_kr.py
@@ -0,0 +1,39 @@
+#
+# iso2022_kr.py: Python Unicode Codec for ISO2022_KR
+#
+# Written by Hye-Shik Chang
+#
+
+import _codecs_iso2022, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_iso2022.getcodec('iso2022_kr')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='iso2022_kr',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
diff --git a/dist/lib/encodings/iso8859_1.py b/dist/lib/encodings/iso8859_1.py
new file mode 100644
index 0000000..8cfc01f
--- /dev/null
+++ b/dist/lib/encodings/iso8859_1.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec iso8859_1 generated from 'MAPPINGS/ISO8859/8859-1.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='iso8859-1',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\x80' # 0x80 ->
+ '\x81' # 0x81 ->
+ '\x82' # 0x82 ->
+ '\x83' # 0x83 ->
+ '\x84' # 0x84 ->
+ '\x85' # 0x85 ->
+ '\x86' # 0x86 ->
+ '\x87' # 0x87 ->
+ '\x88' # 0x88 ->
+ '\x89' # 0x89 ->
+ '\x8a' # 0x8A ->
+ '\x8b' # 0x8B ->
+ '\x8c' # 0x8C ->
+ '\x8d' # 0x8D ->
+ '\x8e' # 0x8E ->
+ '\x8f' # 0x8F ->
+ '\x90' # 0x90 ->
+ '\x91' # 0x91 ->
+ '\x92' # 0x92 ->
+ '\x93' # 0x93 ->
+ '\x94' # 0x94 ->
+ '\x95' # 0x95 ->
+ '\x96' # 0x96 ->
+ '\x97' # 0x97 ->
+ '\x98' # 0x98 ->
+ '\x99' # 0x99 ->
+ '\x9a' # 0x9A ->
+ '\x9b' # 0x9B ->
+ '\x9c' # 0x9C ->
+ '\x9d' # 0x9D ->
+ '\x9e' # 0x9E ->
+ '\x9f' # 0x9F ->
+ '\xa0' # 0xA0 -> NO-BREAK SPACE
+ '\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK
+ '\xa2' # 0xA2 -> CENT SIGN
+ '\xa3' # 0xA3 -> POUND SIGN
+ '\xa4' # 0xA4 -> CURRENCY SIGN
+ '\xa5' # 0xA5 -> YEN SIGN
+ '\xa6' # 0xA6 -> BROKEN BAR
+ '\xa7' # 0xA7 -> SECTION SIGN
+ '\xa8' # 0xA8 -> DIAERESIS
+ '\xa9' # 0xA9 -> COPYRIGHT SIGN
+ '\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR
+ '\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xac' # 0xAC -> NOT SIGN
+ '\xad' # 0xAD -> SOFT HYPHEN
+ '\xae' # 0xAE -> REGISTERED SIGN
+ '\xaf' # 0xAF -> MACRON
+ '\xb0' # 0xB0 -> DEGREE SIGN
+ '\xb1' # 0xB1 -> PLUS-MINUS SIGN
+ '\xb2' # 0xB2 -> SUPERSCRIPT TWO
+ '\xb3' # 0xB3 -> SUPERSCRIPT THREE
+ '\xb4' # 0xB4 -> ACUTE ACCENT
+ '\xb5' # 0xB5 -> MICRO SIGN
+ '\xb6' # 0xB6 -> PILCROW SIGN
+ '\xb7' # 0xB7 -> MIDDLE DOT
+ '\xb8' # 0xB8 -> CEDILLA
+ '\xb9' # 0xB9 -> SUPERSCRIPT ONE
+ '\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR
+ '\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ '\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER
+ '\xbd' # 0xBD -> VULGAR FRACTION ONE HALF
+ '\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS
+ '\xbf' # 0xBF -> INVERTED QUESTION MARK
+ '\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+ '\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+ '\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+ '\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
+ '\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+ '\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+ '\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE
+ '\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+ '\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+ '\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+ '\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+ '\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+ '\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
+ '\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+ '\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+ '\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+ '\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic)
+ '\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+ '\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
+ '\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+ '\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+ '\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+ '\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+ '\xd7' # 0xD7 -> MULTIPLICATION SIGN
+ '\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+ '\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+ '\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+ '\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+ '\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+ '\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
+ '\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic)
+ '\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German)
+ '\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+ '\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+ '\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+ '\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE
+ '\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+ '\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+ '\xe6' # 0xE6 -> LATIN SMALL LETTER AE
+ '\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+ '\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+ '\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+ '\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+ '\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+ '\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE
+ '\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE
+ '\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+ '\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+ '\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic)
+ '\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE
+ '\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE
+ '\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+ '\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+ '\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE
+ '\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+ '\xf7' # 0xF7 -> DIVISION SIGN
+ '\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE
+ '\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+ '\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE
+ '\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+ '\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+ '\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE
+ '\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic)
+ '\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+)
+
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
diff --git a/dist/lib/encodings/iso8859_10.py b/dist/lib/encodings/iso8859_10.py
new file mode 100644
index 0000000..b4fb041
--- /dev/null
+++ b/dist/lib/encodings/iso8859_10.py
@@ -0,0 +1,307 @@
+""" Python Character Mapping Codec iso8859_10 generated from 'MAPPINGS/ISO8859/8859-10.TXT' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_table)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_table)
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_table)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_table)[0]
+
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='iso8859-10',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+
+
+### Decoding Table
+
+decoding_table = (
+ '\x00' # 0x00 -> NULL
+ '\x01' # 0x01 -> START OF HEADING
+ '\x02' # 0x02 -> START OF TEXT
+ '\x03' # 0x03 -> END OF TEXT
+ '\x04' # 0x04 -> END OF TRANSMISSION
+ '\x05' # 0x05 -> ENQUIRY
+ '\x06' # 0x06 -> ACKNOWLEDGE
+ '\x07' # 0x07 -> BELL
+ '\x08' # 0x08 -> BACKSPACE
+ '\t' # 0x09 -> HORIZONTAL TABULATION
+ '\n' # 0x0A -> LINE FEED
+ '\x0b' # 0x0B -> VERTICAL TABULATION
+ '\x0c' # 0x0C -> FORM FEED
+ '\r' # 0x0D -> CARRIAGE RETURN
+ '\x0e' # 0x0E -> SHIFT OUT
+ '\x0f' # 0x0F -> SHIFT IN
+ '\x10' # 0x10 -> DATA LINK ESCAPE
+ '\x11' # 0x11 -> DEVICE CONTROL ONE
+ '\x12' # 0x12 -> DEVICE CONTROL TWO
+ '\x13' # 0x13 -> DEVICE CONTROL THREE
+ '\x14' # 0x14 -> DEVICE CONTROL FOUR
+ '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
+ '\x16' # 0x16 -> SYNCHRONOUS IDLE
+ '\x17' # 0x17 -> END OF TRANSMISSION BLOCK
+ '\x18' # 0x18 -> CANCEL
+ '\x19' # 0x19 -> END OF MEDIUM
+ '\x1a' # 0x1A -> SUBSTITUTE
+ '\x1b' # 0x1B -> ESCAPE
+ '\x1c' # 0x1C -> FILE SEPARATOR
+ '\x1d' # 0x1D -> GROUP SEPARATOR
+ '\x1e' # 0x1E -> RECORD SEPARATOR
+ '\x1f' # 0x1F -> UNIT SEPARATOR
+ ' ' # 0x20 -> SPACE
+ '!' # 0x21 -> EXCLAMATION MARK
+ '"' # 0x22 -> QUOTATION MARK
+ '#' # 0x23 -> NUMBER SIGN
+ '$' # 0x24 -> DOLLAR SIGN
+ '%' # 0x25 -> PERCENT SIGN
+ '&' # 0x26 -> AMPERSAND
+ "'" # 0x27 -> APOSTROPHE
+ '(' # 0x28 -> LEFT PARENTHESIS
+ ')' # 0x29 -> RIGHT PARENTHESIS
+ '*' # 0x2A -> ASTERISK
+ '+' # 0x2B -> PLUS SIGN
+ ',' # 0x2C -> COMMA
+ '-' # 0x2D -> HYPHEN-MINUS
+ '.' # 0x2E -> FULL STOP
+ '/' # 0x2F -> SOLIDUS
+ '0' # 0x30 -> DIGIT ZERO
+ '1' # 0x31 -> DIGIT ONE
+ '2' # 0x32 -> DIGIT TWO
+ '3' # 0x33 -> DIGIT THREE
+ '4' # 0x34 -> DIGIT FOUR
+ '5' # 0x35 -> DIGIT FIVE
+ '6' # 0x36 -> DIGIT SIX
+ '7' # 0x37 -> DIGIT SEVEN
+ '8' # 0x38 -> DIGIT EIGHT
+ '9' # 0x39 -> DIGIT NINE
+ ':' # 0x3A -> COLON
+ ';' # 0x3B -> SEMICOLON
+ '<' # 0x3C -> LESS-THAN SIGN
+ '=' # 0x3D -> EQUALS SIGN
+ '>' # 0x3E -> GREATER-THAN SIGN
+ '?' # 0x3F -> QUESTION MARK
+ '@' # 0x40 -> COMMERCIAL AT
+ 'A' # 0x41 -> LATIN CAPITAL LETTER A
+ 'B' # 0x42 -> LATIN CAPITAL LETTER B
+ 'C' # 0x43 -> LATIN CAPITAL LETTER C
+ 'D' # 0x44 -> LATIN CAPITAL LETTER D
+ 'E' # 0x45 -> LATIN CAPITAL LETTER E
+ 'F' # 0x46 -> LATIN CAPITAL LETTER F
+ 'G' # 0x47 -> LATIN CAPITAL LETTER G
+ 'H' # 0x48 -> LATIN CAPITAL LETTER H
+ 'I' # 0x49 -> LATIN CAPITAL LETTER I
+ 'J' # 0x4A -> LATIN CAPITAL LETTER J
+ 'K' # 0x4B -> LATIN CAPITAL LETTER K
+ 'L' # 0x4C -> LATIN CAPITAL LETTER L
+ 'M' # 0x4D -> LATIN CAPITAL LETTER M
+ 'N' # 0x4E -> LATIN CAPITAL LETTER N
+ 'O' # 0x4F -> LATIN CAPITAL LETTER O
+ 'P' # 0x50 -> LATIN CAPITAL LETTER P
+ 'Q' # 0x51 -> LATIN CAPITAL LETTER Q
+ 'R' # 0x52 -> LATIN CAPITAL LETTER R
+ 'S' # 0x53 -> LATIN CAPITAL LETTER S
+ 'T' # 0x54 -> LATIN CAPITAL LETTER T
+ 'U' # 0x55 -> LATIN CAPITAL LETTER U
+ 'V' # 0x56 -> LATIN CAPITAL LETTER V
+ 'W' # 0x57 -> LATIN CAPITAL LETTER W
+ 'X' # 0x58 -> LATIN CAPITAL LETTER X
+ 'Y' # 0x59 -> LATIN CAPITAL LETTER Y
+ 'Z' # 0x5A -> LATIN CAPITAL LETTER Z
+ '[' # 0x5B -> LEFT SQUARE BRACKET
+ '\\' # 0x5C -> REVERSE SOLIDUS
+ ']' # 0x5D -> RIGHT SQUARE BRACKET
+ '^' # 0x5E -> CIRCUMFLEX ACCENT
+ '_' # 0x5F -> LOW LINE
+ '`' # 0x60 -> GRAVE ACCENT
+ 'a' # 0x61 -> LATIN SMALL LETTER A
+ 'b' # 0x62 -> LATIN SMALL LETTER B
+ 'c' # 0x63 -> LATIN SMALL LETTER C
+ 'd' # 0x64 -> LATIN SMALL LETTER D
+ 'e' # 0x65 -> LATIN SMALL LETTER E
+ 'f' # 0x66 -> LATIN SMALL LETTER F
+ 'g' # 0x67 -> LATIN SMALL LETTER G
+ 'h' # 0x68 -> LATIN SMALL LETTER H
+ 'i' # 0x69 -> LATIN SMALL LETTER I
+ 'j' # 0x6A -> LATIN SMALL LETTER J
+ 'k' # 0x6B -> LATIN SMALL LETTER K
+ 'l' # 0x6C -> LATIN SMALL LETTER L
+ 'm' # 0x6D -> LATIN SMALL LETTER M
+ 'n' # 0x6E -> LATIN SMALL LETTER N
+ 'o' # 0x6F -> LATIN SMALL LETTER O
+ 'p' # 0x70 -> LATIN SMALL LETTER P
+ 'q' # 0x71 -> LATIN SMALL LETTER Q
+ 'r' # 0x72 -> LATIN SMALL LETTER R
+ 's' # 0x73 -> LATIN SMALL LETTER S
+ 't' # 0x74 -> LATIN SMALL LETTER T
+ 'u' # 0x75 -> LATIN SMALL LETTER U
+ 'v' # 0x76 -> LATIN SMALL LETTER V
+ 'w' # 0x77 -> LATIN SMALL LETTER W
+ 'x' # 0x78 -> LATIN SMALL LETTER X
+ 'y' # 0x79 -> LATIN SMALL LETTER Y
+ 'z' # 0x7A -> LATIN SMALL LETTER Z
+ '{' # 0x7B -> LEFT CURLY BRACKET
+ '|' # 0x7C -> VERTICAL LINE
+ '}' # 0x7D -> RIGHT CURLY BRACKET
+ '~' # 0x7E -> TILDE
+ '\x7f' # 0x7F -> DELETE
+ '\x80' # 0x80 ->
+ '\x81' # 0x81 ->
+ '\x82' # 0x82 ->
+ '\x83' # 0x83 ->