* Add upip dependencies
* Add firmware updater * Add urlencode lib * Add tests for http library * Support form encoding in http postdont-delete-test-download-branch
parent
992d00dc0f
commit
1fd7f5c0db
|
@ -6,6 +6,7 @@ _pyb = None
|
|||
def get_pyb(args):
|
||||
global _pyb
|
||||
if not _pyb:
|
||||
print("Connected to badge:", end="")
|
||||
if not args.device:
|
||||
args.device = find_tty()
|
||||
|
||||
|
@ -13,10 +14,10 @@ def get_pyb(args):
|
|||
try:
|
||||
_pyb = Pyboard(args.device, args.baudrate, None, None, args.wait)
|
||||
except PyboardError as er:
|
||||
print(" FAIL")
|
||||
print(er)
|
||||
sys.exit(1)
|
||||
print("Connected to badge.")
|
||||
|
||||
print(" DONE")
|
||||
return _pyb
|
||||
|
||||
def close_pyb():
|
||||
|
@ -24,10 +25,13 @@ def close_pyb():
|
|||
if _pyb:
|
||||
_pyb.close()
|
||||
|
||||
def stop_badge(args):
|
||||
def stop_badge(args, verbose):
|
||||
pyb = get_pyb(args)
|
||||
print("stopping running app")
|
||||
if verbose:
|
||||
print("Stopping running app:", end="")
|
||||
write_command(pyb, b'\r\x03\x03') # ctrl-C twice: interrupt any running program
|
||||
if verbose:
|
||||
print(" DONE")
|
||||
|
||||
def write_command(pyb, command):
|
||||
flush_input(pyb)
|
||||
|
@ -42,14 +46,15 @@ def flush_input(pyb):
|
|||
|
||||
def soft_reset(args):
|
||||
pyb = get_pyb(args)
|
||||
print("trying to soft reboot badge")
|
||||
print("Soft reboot:", end="")
|
||||
write_command(pyb, b'\x04') # ctrl-D: soft reset
|
||||
#print("1")
|
||||
data = pyb.read_until(1, b'soft reboot\r\n')
|
||||
#print("2")
|
||||
if data.endswith(b'soft reboot\r\n'):
|
||||
print("Soft reboot was successful.")
|
||||
print(" DONE")
|
||||
else:
|
||||
print(" FAIL")
|
||||
raise PyboardError('could not soft reboot')
|
||||
|
||||
def find_tty():
|
||||
|
@ -60,10 +65,17 @@ def find_tty():
|
|||
print("Couldn't find badge tty - Please make it's plugged in and reset it if necessary")
|
||||
sys.exit(1)
|
||||
|
||||
def check_run(args):
|
||||
if args.command is not None or len(args.paths):
|
||||
for filename in args.paths:
|
||||
with open(filename, 'r') as f:
|
||||
pyfile = f.read()
|
||||
compile(pyfile + '\n', filename, 'exec')
|
||||
|
||||
def run(args):
|
||||
pyb = get_pyb(args)
|
||||
print("executing %s" % args.paths)
|
||||
print("----------------")
|
||||
|
||||
print("Preparing execution:", end="")
|
||||
# run any command or file(s) - this is mostly a copy from pyboard.py
|
||||
if args.command is not None or len(args.paths):
|
||||
# we must enter raw-REPL mode to execute commands
|
||||
|
@ -71,9 +83,11 @@ def run(args):
|
|||
try:
|
||||
pyb.enter_raw_repl()
|
||||
except PyboardError as er:
|
||||
print(" FAIL")
|
||||
print(er)
|
||||
pyb.close()
|
||||
sys.exit(1)
|
||||
print(" DONE")
|
||||
|
||||
def execbuffer(buf):
|
||||
try:
|
||||
|
@ -93,6 +107,7 @@ def run(args):
|
|||
# run any files
|
||||
for filename in args.paths:
|
||||
with open(filename, 'rb') as f:
|
||||
print("-------- %s --------" % filename)
|
||||
pyfile = f.read()
|
||||
execbuffer(pyfile)
|
||||
|
||||
|
|
|
@ -0,0 +1,543 @@
|
|||
#!/usr/bin/env python
|
||||
# This file is part of the OpenMV project.
|
||||
# Copyright (c) 2013/2014 Ibrahim Abdelkader <i.abdalkader@gmail.com>
|
||||
# This work is licensed under the MIT license, see the file LICENSE for
|
||||
# details.
|
||||
|
||||
"""This module implements enough functionality to program the STM32F4xx over
|
||||
DFU, without requiring dfu-util.
|
||||
See app note AN3156 for a description of the DFU protocol.
|
||||
See document UM0391 for a dscription of the DFuse file.
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import struct
|
||||
import sys
|
||||
import usb.core
|
||||
import usb.util
|
||||
import zlib
|
||||
|
||||
# VID/PID
|
||||
__VID = 0x0483
|
||||
__PID = 0xdf11
|
||||
|
||||
# USB request __TIMEOUT
|
||||
__TIMEOUT = 4000
|
||||
|
||||
# DFU commands
|
||||
__DFU_DETACH = 0
|
||||
__DFU_DNLOAD = 1
|
||||
__DFU_UPLOAD = 2
|
||||
__DFU_GETSTATUS = 3
|
||||
__DFU_CLRSTATUS = 4
|
||||
__DFU_GETSTATE = 5
|
||||
__DFU_ABORT = 6
|
||||
|
||||
# DFU status
|
||||
__DFU_STATE_APP_IDLE = 0x00
|
||||
__DFU_STATE_APP_DETACH = 0x01
|
||||
__DFU_STATE_DFU_IDLE = 0x02
|
||||
__DFU_STATE_DFU_DOWNLOAD_SYNC = 0x03
|
||||
__DFU_STATE_DFU_DOWNLOAD_BUSY = 0x04
|
||||
__DFU_STATE_DFU_DOWNLOAD_IDLE = 0x05
|
||||
__DFU_STATE_DFU_MANIFEST_SYNC = 0x06
|
||||
__DFU_STATE_DFU_MANIFEST = 0x07
|
||||
__DFU_STATE_DFU_MANIFEST_WAIT_RESET = 0x08
|
||||
__DFU_STATE_DFU_UPLOAD_IDLE = 0x09
|
||||
__DFU_STATE_DFU_ERROR = 0x0a
|
||||
|
||||
_DFU_DESCRIPTOR_TYPE = 0x21
|
||||
|
||||
|
||||
# USB device handle
|
||||
__dev = None
|
||||
|
||||
__verbose = None
|
||||
|
||||
# USB DFU interface
|
||||
__DFU_INTERFACE = 0
|
||||
|
||||
import inspect
|
||||
if 'length' in inspect.getfullargspec(usb.util.get_string).args:
|
||||
# PyUSB 1.0.0.b1 has the length argument
|
||||
def get_string(dev, index):
|
||||
return usb.util.get_string(dev, 255, index)
|
||||
else:
|
||||
# PyUSB 1.0.0.b2 dropped the length argument
|
||||
def get_string(dev, index):
|
||||
return usb.util.get_string(dev, index)
|
||||
|
||||
|
||||
def init():
|
||||
"""Initializes the found DFU device so that we can program it."""
|
||||
global __dev
|
||||
devices = get_dfu_devices(idVendor=__VID, idProduct=__PID)
|
||||
if not devices:
|
||||
raise ValueError('No DFU device found')
|
||||
if len(devices) > 1:
|
||||
raise ValueError("Multiple DFU devices found")
|
||||
__dev = devices[0]
|
||||
__dev.set_configuration()
|
||||
|
||||
# Claim DFU interface
|
||||
usb.util.claim_interface(__dev, __DFU_INTERFACE)
|
||||
|
||||
# Clear status
|
||||
clr_status()
|
||||
|
||||
|
||||
def clr_status():
|
||||
"""Clears any error status (perhaps left over from a previous session)."""
|
||||
__dev.ctrl_transfer(0x21, __DFU_CLRSTATUS, 0, __DFU_INTERFACE,
|
||||
None, __TIMEOUT)
|
||||
|
||||
|
||||
def get_status():
|
||||
"""Get the status of the last operation."""
|
||||
stat = __dev.ctrl_transfer(0xA1, __DFU_GETSTATUS, 0, __DFU_INTERFACE,
|
||||
6, 20000)
|
||||
# print (__DFU_STAT[stat[4]], stat)
|
||||
return stat[4]
|
||||
|
||||
|
||||
def mass_erase():
|
||||
"""Performs a MASS erase (i.e. erases the entire device."""
|
||||
# Send DNLOAD with first byte=0x41
|
||||
__dev.ctrl_transfer(0x21, __DFU_DNLOAD, 0, __DFU_INTERFACE,
|
||||
"\x41", __TIMEOUT)
|
||||
|
||||
# Execute last command
|
||||
if get_status() != __DFU_STATE_DFU_DOWNLOAD_BUSY:
|
||||
raise Exception("DFU: erase failed")
|
||||
|
||||
# Check command state
|
||||
if get_status() != __DFU_STATE_DFU_DOWNLOAD_IDLE:
|
||||
raise Exception("DFU: erase failed")
|
||||
|
||||
|
||||
def page_erase(addr):
|
||||
"""Erases a single page."""
|
||||
if __verbose:
|
||||
print("Erasing page: 0x%x..." % (addr))
|
||||
|
||||
# Send DNLOAD with first byte=0x41 and page address
|
||||
buf = struct.pack("<BI", 0x41, addr)
|
||||
__dev.ctrl_transfer(0x21, __DFU_DNLOAD, 0, __DFU_INTERFACE, buf, __TIMEOUT)
|
||||
|
||||
# Execute last command
|
||||
if get_status() != __DFU_STATE_DFU_DOWNLOAD_BUSY:
|
||||
raise Exception("DFU: erase failed")
|
||||
|
||||
# Check command state
|
||||
if get_status() != __DFU_STATE_DFU_DOWNLOAD_IDLE:
|
||||
|
||||
raise Exception("DFU: erase failed")
|
||||
|
||||
|
||||
def set_address(addr):
|
||||
"""Sets the address for the next operation."""
|
||||
# Send DNLOAD with first byte=0x21 and page address
|
||||
buf = struct.pack("<BI", 0x21, addr)
|
||||
__dev.ctrl_transfer(0x21, __DFU_DNLOAD, 0, __DFU_INTERFACE, buf, __TIMEOUT)
|
||||
|
||||
# Execute last command
|
||||
if get_status() != __DFU_STATE_DFU_DOWNLOAD_BUSY:
|
||||
raise Exception("DFU: set address failed")
|
||||
|
||||
# Check command state
|
||||
if get_status() != __DFU_STATE_DFU_DOWNLOAD_IDLE:
|
||||
raise Exception("DFU: set address failed")
|
||||
|
||||
|
||||
def write_memory(addr, buf, progress=None, progress_addr=0, progress_size=0):
|
||||
"""Writes a buffer into memory. This routine assumes that memory has
|
||||
already been erased.
|
||||
"""
|
||||
|
||||
xfer_count = 0
|
||||
xfer_bytes = 0
|
||||
xfer_total = len(buf)
|
||||
xfer_base = addr
|
||||
|
||||
while xfer_bytes < xfer_total:
|
||||
if __verbose and xfer_count % 512 == 0:
|
||||
print ("Addr 0x%x %dKBs/%dKBs..." % (xfer_base + xfer_bytes,
|
||||
xfer_bytes // 1024,
|
||||
xfer_total // 1024))
|
||||
if progress and xfer_count % 2 == 0:
|
||||
progress(progress_addr, xfer_base + xfer_bytes - progress_addr,
|
||||
progress_size)
|
||||
|
||||
# Set mem write address
|
||||
set_address(xfer_base+xfer_bytes)
|
||||
|
||||
# Send DNLOAD with fw data
|
||||
# the "2048" is the DFU transfer size supported by the ST DFU bootloader
|
||||
# TODO: this number should be extracted from the USB config descriptor
|
||||
chunk = min(2048, xfer_total-xfer_bytes)
|
||||
__dev.ctrl_transfer(0x21, __DFU_DNLOAD, 2, __DFU_INTERFACE,
|
||||
buf[xfer_bytes:xfer_bytes + chunk], __TIMEOUT)
|
||||
|
||||
# Execute last command
|
||||
if get_status() != __DFU_STATE_DFU_DOWNLOAD_BUSY:
|
||||
raise Exception("DFU: write memory failed")
|
||||
|
||||
# Check command state
|
||||
if get_status() != __DFU_STATE_DFU_DOWNLOAD_IDLE:
|
||||
raise Exception("DFU: write memory failed")
|
||||
|
||||
xfer_count += 1
|
||||
xfer_bytes += chunk
|
||||
|
||||
|
||||
def write_page(buf, xfer_offset):
|
||||
"""Writes a single page. This routine assumes that memory has already
|
||||
been erased.
|
||||
"""
|
||||
|
||||
xfer_base = 0x08000000
|
||||
|
||||
# Set mem write address
|
||||
set_address(xfer_base+xfer_offset)
|
||||
|
||||
# Send DNLOAD with fw data
|
||||
__dev.ctrl_transfer(0x21, __DFU_DNLOAD, 2, __DFU_INTERFACE, buf, __TIMEOUT)
|
||||
|
||||
# Execute last command
|
||||
if get_status() != __DFU_STATE_DFU_DOWNLOAD_BUSY:
|
||||
raise Exception("DFU: write memory failed")
|
||||
|
||||
# Check command state
|
||||
if get_status() != __DFU_STATE_DFU_DOWNLOAD_IDLE:
|
||||
raise Exception("DFU: write memory failed")
|
||||
|
||||
if __verbose:
|
||||
print ("Write: 0x%x " % (xfer_base + xfer_offset))
|
||||
|
||||
|
||||
def exit_dfu():
|
||||
"""Exit DFU mode, and start running the program."""
|
||||
|
||||
# set jump address
|
||||
set_address(0x08000000)
|
||||
|
||||
# Send DNLOAD with 0 length to exit DFU
|
||||
__dev.ctrl_transfer(0x21, __DFU_DNLOAD, 0, __DFU_INTERFACE,
|
||||
None, __TIMEOUT)
|
||||
|
||||
try:
|
||||
# Execute last command
|
||||
if get_status() != __DFU_STATE_DFU_MANIFEST:
|
||||
print("Failed to reset device")
|
||||
|
||||
# Release device
|
||||
usb.util.dispose_resources(__dev)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
def named(values, names):
|
||||
"""Creates a dict with `names` as fields, and `values` as values."""
|
||||
return dict(zip(names.split(), values))
|
||||
|
||||
|
||||
def consume(fmt, data, names):
|
||||
"""Parses the struct defined by `fmt` from `data`, stores the parsed fields
|
||||
into a named tuple using `names`. Returns the named tuple, and the data
|
||||
with the struct stripped off."""
|
||||
size = struct.calcsize(fmt)
|
||||
return named(struct.unpack(fmt, data[:size]), names), data[size:]
|
||||
|
||||
|
||||
def cstring(string):
|
||||
"""Extracts a null-terminated string from a byte array."""
|
||||
return string.decode('utf-8').split('\0', 1)[0]
|
||||
|
||||
|
||||
def compute_crc(data):
|
||||
"""Computes the CRC32 value for the data passed in."""
|
||||
return 0xFFFFFFFF & -zlib.crc32(data) - 1
|
||||
|
||||
|
||||
def read_dfu_file(filename):
|
||||
"""Reads a DFU file, and parses the individual elements from the file.
|
||||
Returns an array of elements. Each element is a dictionary with the
|
||||
following keys:
|
||||
num - The element index
|
||||
address - The address that the element data should be written to.
|
||||
size - The size of the element ddata.
|
||||
data - The element data.
|
||||
If an error occurs while parsing the file, then None is returned.
|
||||
"""
|
||||
|
||||
print("File: {}".format(filename))
|
||||
with open(filename, 'rb') as fin:
|
||||
data = fin.read()
|
||||
crc = compute_crc(data[:-4])
|
||||
elements = []
|
||||
|
||||
# Decode the DFU Prefix
|
||||
#
|
||||
# <5sBIB
|
||||
# < little endian
|
||||
# 5s char[5] signature "DfuSe"
|
||||
# B uint8_t version 1
|
||||
# I uint32_t size Size of the DFU file (not including suffix)
|
||||
# B uint8_t targets Number of targets
|
||||
dfu_prefix, data = consume('<5sBIB', data,
|
||||
'signature version size targets')
|
||||
print (" %(signature)s v%(version)d, image size: %(size)d, "
|
||||
"targets: %(targets)d" % dfu_prefix)
|
||||
for target_idx in range(dfu_prefix['targets']):
|
||||
# Decode the Image Prefix
|
||||
#
|
||||
# <6sBI255s2I
|
||||
# < little endian
|
||||
# 6s char[6] signature "Target"
|
||||
# B uint8_t altsetting
|
||||
# I uint32_t named bool indicating if a name was used
|
||||
# 255s char[255] name name of the target
|
||||
# I uint32_t size size of image (not incl prefix)
|
||||
# I uint32_t elements Number of elements in the image
|
||||
img_prefix, data = consume('<6sBI255s2I', data,
|
||||
'signature altsetting named name '
|
||||
'size elements')
|
||||
img_prefix['num'] = target_idx
|
||||
if img_prefix['named']:
|
||||
img_prefix['name'] = cstring(img_prefix['name'])
|
||||
else:
|
||||
img_prefix['name'] = ''
|
||||
print(' %(signature)s %(num)d, alt setting: %(altsetting)s, '
|
||||
'name: "%(name)s", size: %(size)d, elements: %(elements)d'
|
||||
% img_prefix)
|
||||
|
||||
target_size = img_prefix['size']
|
||||
target_data, data = data[:target_size], data[target_size:]
|
||||
for elem_idx in range(img_prefix['elements']):
|
||||
# Decode target prefix
|
||||
# < little endian
|
||||
# I uint32_t element address
|
||||
# I uint32_t element size
|
||||
elem_prefix, target_data = consume('<2I', target_data, 'addr size')
|
||||
elem_prefix['num'] = elem_idx
|
||||
print(' %(num)d, address: 0x%(addr)08x, size: %(size)d'
|
||||
% elem_prefix)
|
||||
elem_size = elem_prefix['size']
|
||||
elem_data = target_data[:elem_size]
|
||||
target_data = target_data[elem_size:]
|
||||
elem_prefix['data'] = elem_data
|
||||
elements.append(elem_prefix)
|
||||
|
||||
if len(target_data):
|
||||
print("target %d PARSE ERROR" % target_idx)
|
||||
|
||||
# Decode DFU Suffix
|
||||
# < little endian
|
||||
# H uint16_t device Firmware version
|
||||
# H uint16_t product
|
||||
# H uint16_t vendor
|
||||
# H uint16_t dfu 0x11a (DFU file format version)
|
||||
# 3s char[3] ufd 'UFD'
|
||||
# B uint8_t len 16
|
||||
# I uint32_t crc32
|
||||
dfu_suffix = named(struct.unpack('<4H3sBI', data[:16]),
|
||||
'device product vendor dfu ufd len crc')
|
||||
print (' usb: %(vendor)04x:%(product)04x, device: 0x%(device)04x, '
|
||||
'dfu: 0x%(dfu)04x, %(ufd)s, %(len)d, 0x%(crc)08x' % dfu_suffix)
|
||||
if crc != dfu_suffix['crc']:
|
||||
print("CRC ERROR: computed crc32 is 0x%08x" % crc)
|
||||
return
|
||||
data = data[16:]
|
||||
if data:
|
||||
print("PARSE ERROR")
|
||||
return
|
||||
|
||||
return elements
|
||||
|
||||
|
||||
class FilterDFU(object):
|
||||
"""Class for filtering USB devices to identify devices which are in DFU
|
||||
mode.
|
||||
"""
|
||||
|
||||
def __call__(self, device):
|
||||
for cfg in device:
|
||||
for intf in cfg:
|
||||
return (intf.bInterfaceClass == 0xFE and
|
||||
intf.bInterfaceSubClass == 1)
|
||||
|
||||
|
||||
def get_dfu_devices(*args, **kwargs):
|
||||
"""Returns a list of USB device which are currently in DFU mode.
|
||||
Additional filters (like idProduct and idVendor) can be passed in to
|
||||
refine the search.
|
||||
"""
|
||||
# convert to list for compatibility with newer pyusb
|
||||
return list(usb.core.find(*args, find_all=True,
|
||||
custom_match=FilterDFU(), **kwargs))
|
||||
|
||||
|
||||
def get_memory_layout(device):
|
||||
"""Returns an array which identifies the memory layout. Each entry
|
||||
of the array will contain a dictionary with the following keys:
|
||||
addr - Address of this memory segment
|
||||
last_addr - Last address contained within the memory segment.
|
||||
size - size of the segment, in bytes
|
||||
num_pages - number of pages in the segment
|
||||
page_size - size of each page, in bytes
|
||||
"""
|
||||
cfg = device[0]
|
||||
intf = cfg[(0, 0)]
|
||||
mem_layout_str = get_string(device, intf.iInterface)
|
||||
mem_layout = mem_layout_str.split('/')
|
||||
result = []
|
||||
for mem_layout_index in range(1, len(mem_layout), 2):
|
||||
addr = int(mem_layout[mem_layout_index], 0)
|
||||
segments = mem_layout[mem_layout_index + 1].split(',')
|
||||
seg_re = re.compile(r'(\d+)\*(\d+)(.)(.)')
|
||||
for segment in segments:
|
||||
seg_match = seg_re.match(segment)
|
||||
num_pages = int(seg_match.groups()[0], 10)
|
||||
page_size = int(seg_match.groups()[1], 10)
|
||||
multiplier = seg_match.groups()[2]
|
||||
if multiplier == 'K':
|
||||
page_size *= 1024
|
||||
if multiplier == 'M':
|
||||
page_size *= 1024 * 1024
|
||||
size = num_pages * page_size
|
||||
last_addr = addr + size - 1
|
||||
result.append(named((addr, last_addr, size, num_pages, page_size),
|
||||
"addr last_addr size num_pages page_size"))
|
||||
addr += size
|
||||
return result
|
||||
|
||||
|
||||
def list_dfu_devices(*args, **kwargs):
|
||||
"""Prints a lits of devices detected in DFU mode."""
|
||||
devices = get_dfu_devices(*args, **kwargs)
|
||||
if not devices:
|
||||
print("No DFU capable devices found")
|
||||
return
|
||||
for device in devices:
|
||||
print("Bus {} Device {:03d}: ID {:04x}:{:04x}"
|
||||
.format(device.bus, device.address,
|
||||
device.idVendor, device.idProduct))
|
||||
layout = get_memory_layout(device)
|
||||
print("Memory Layout")
|
||||
for entry in layout:
|
||||
print(" 0x{:x} {:2d} pages of {:3d}K bytes"
|
||||
.format(entry['addr'], entry['num_pages'],
|
||||
entry['page_size'] // 1024))
|
||||
|
||||
|
||||
def write_elements(elements, mass_erase_used, progress=None):
|
||||
"""Writes the indicated elements into the target memory,
|
||||
erasing as needed.
|
||||
"""
|
||||
|
||||
mem_layout = get_memory_layout(__dev)
|
||||
for elem in elements:
|
||||
addr = elem['addr']
|
||||
size = elem['size']
|
||||
data = elem['data']
|
||||
elem_size = size
|
||||
elem_addr = addr
|
||||
if progress:
|
||||
progress(elem_addr, 0, elem_size)
|
||||
while size > 0:
|
||||
write_size = size
|
||||
if not mass_erase_used:
|
||||
for segment in mem_layout:
|
||||
if addr >= segment['addr'] and \
|
||||
addr <= segment['last_addr']:
|
||||
# We found the page containing the address we want to
|
||||
# write, erase it
|
||||
page_size = segment['page_size']
|
||||
page_addr = addr & ~(page_size - 1)
|
||||
if addr + write_size > page_addr + page_size:
|
||||
write_size = page_addr + page_size - addr
|
||||
page_erase(page_addr)
|
||||
break
|
||||
write_memory(addr, data[:write_size], progress,
|
||||
elem_addr, elem_size)
|
||||
data = data[write_size:]
|
||||
addr += write_size
|
||||
size -= write_size
|
||||
if progress:
|
||||
progress(elem_addr, addr - elem_addr, elem_size)
|
||||
|
||||
|
||||
def cli_progress(addr, offset, size):
|
||||
"""Prints a progress report suitable for use on the command line."""
|
||||
width = 25
|
||||
done = offset * width // size
|
||||
print("\r0x{:08x} {:7d} [{}{}] {:3d}% "
|
||||
.format(addr, size, '=' * done, ' ' * (width - done),
|
||||
offset * 100 // size), end="")
|
||||
sys.stdout.flush()
|
||||
if offset == size:
|
||||
print("")
|
||||
|
||||
|
||||
def main():
|
||||
"""Test program for verifying this files functionality."""
|
||||
global __verbose
|
||||
# Parse CMD args
|
||||
parser = argparse.ArgumentParser(description='DFU Python Util')
|
||||
#parser.add_argument("path", help="file path")
|
||||
parser.add_argument(
|
||||
"-l", "--list",
|
||||
help="list available DFU devices",
|
||||
action="store_true",
|
||||
default=False
|
||||
)
|
||||
parser.add_argument(
|
||||
"-m", "--mass-erase",
|
||||
help="mass erase device",
|
||||
action="store_true",
|
||||
default=False
|
||||
)
|
||||
parser.add_argument(
|
||||
"-u", "--upload",
|
||||
help="read file from DFU device",
|
||||
dest="path",
|
||||
default=False
|
||||
)
|
||||
parser.add_argument(
|
||||
"-v", "--verbose",
|
||||
help="increase output verbosity",
|
||||
action="store_true",
|
||||
default=False
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
__verbose = args.verbose
|
||||
|
||||
if args.list:
|
||||
list_dfu_devices(idVendor=__VID, idProduct=__PID)
|
||||
return
|
||||
|
||||
init()
|
||||
|
||||
if args.mass_erase:
|
||||
print ("Mass erase...")
|
||||
mass_erase()
|
||||
|
||||
if args.path:
|
||||
elements = read_dfu_file(args.path)
|
||||
if not elements:
|
||||
return
|
||||
print("Writing memory...")
|
||||
write_elements(elements, args.mass_erase, progress=cli_progress)
|
||||
|
||||
print("Exiting DFU...")
|
||||
exit_dfu()
|
||||
return
|
||||
|
||||
print("No command specified")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,49 @@
|
|||
from pydfu import *
|
||||
import urllib.request, tempfile, os, shutil, ssl
|
||||
|
||||
def firmware_update(verbose):
|
||||
global __verbose
|
||||
__verbose = verbose
|
||||
|
||||
temp_path = tempfile.mktemp("firmware.dfu")
|
||||
url = "https://update.badge.emfcamp.org/firmware.dfu"
|
||||
|
||||
print("Hello - Welcome to the automated TiLDA Mk4 firmware updater")
|
||||
print("Finding badge: ", end="")
|
||||
try:
|
||||
init()
|
||||
print("DONE")
|
||||
|
||||
print("Downloading newest firmware: ", end="")
|
||||
context = ssl._create_unverified_context()
|
||||
with urllib.request.urlopen(url, context=context) as response:
|
||||
with open(temp_path, 'wb') as tmp_file:
|
||||
shutil.copyfileobj(response, tmp_file)
|
||||
print("DONE")
|
||||
|
||||
elements = read_dfu_file(temp_path)
|
||||
if not elements:
|
||||
return
|
||||
|
||||
print("Resetting Badge: ", end="")
|
||||
mass_erase()
|
||||
print("DONE")
|
||||
|
||||
print("Updating...")
|
||||
write_elements(elements, True, progress=cli_progress)
|
||||
exit_dfu()
|
||||
|
||||
print("")
|
||||
print("You can now restart your badge by pressing the reset button on the back. Please follow the instructions on the screen to finish the setup")
|
||||
print("Have a nice day!")
|
||||
|
||||
except ValueError as e:
|
||||
print("FAIL")
|
||||
print("")
|
||||
print("We couldn't find your badge. You need to make sure it's plugged in and in DFU mode.")
|
||||
print("To put your badge into DFU mode you need to press the joystick in the middle while pressing the reset button at the back.")
|
||||
print("After that, please try this script again.")
|
||||
print()
|
||||
print("Error: %s" %(e))
|
||||
finally:
|
||||
if os.path.isfile(temp_path): os.remove(temp_path)
|
|
@ -30,8 +30,9 @@ This module has the following operations:
|
|||
resources = get_resources(path) # Gets resources for a given path
|
||||
add_hashes(path, resources) # Adds hashes to the file dict - not needed for testing
|
||||
add_metadata(path, resources) # Adds metadata
|
||||
resolve_dependencies(resources) # Merges all dependencies into each resource's file dict
|
||||
validate(resources) # Runs basic validation
|
||||
resolve_dependencies(resources) # Merges all dependencies into each resource's file dict
|
||||
remove_upip(resources) # Remove upip resources from dict again
|
||||
|
||||
This module encapsulates all the main operations the app library is expect to
|
||||
perform on a given checkout. It's intentionally kept in one file to make it easier
|
||||
|
@ -72,14 +73,30 @@ def get_resources(path):
|
|||
if sub_path.startswith(".") or sub_path == "__pycache__":
|
||||
continue
|
||||
full_path = os.path.join(path, sub_path)
|
||||
if os.path.islink(full_path):
|
||||
continue
|
||||
if os.path.isfile(full_path):
|
||||
result[sub_path] = {"type": "root", "files": {sub_path: None}}
|
||||
continue
|
||||
files = _scan_files(full_path, sub_path)
|
||||
if sub_path in ["lib", "shared"]:
|
||||
files = _scan_files(full_path, sub_path)
|
||||
for rel_path in files:
|
||||
result[rel_path] = {"type": sub_path, "files": {rel_path: None}}
|
||||
elif sub_path == "upip":
|
||||
for upip_lib in os.listdir(full_path):
|
||||
if upip_lib.startswith(".") or upip_lib == "__pycache__":
|
||||
continue
|
||||
full_lib_path = os.path.join(full_path, upip_lib)
|
||||
files = {}
|
||||
if os.path.isfile(full_lib_path):
|
||||
files = {full_lib_path: None}
|
||||
upip_lib = upip_lib.rsplit('.', 1)[0]
|
||||
else:
|
||||
for rel_path in _scan_files(full_lib_path, os.path.join(sub_path, upip_lib)):
|
||||
files[rel_path] = None
|
||||
result["upip:%s" % upip_lib] = {"type": sub_path, "files": files}
|
||||
else:
|
||||
files = _scan_files(full_path, sub_path)
|
||||
result[sub_path] = {"type": "app", "files": {}}
|
||||
for rel_path in files:
|
||||
result[sub_path]["files"][rel_path] = None
|
||||
|
@ -131,16 +148,10 @@ def add_metadata(path, resources):
|
|||
def _normalize_metadata(metadata):
|
||||
metadata['description'] = metadata.pop('doc')
|
||||
if 'dependencies' in metadata:
|
||||
metadata['dependencies'] = [_normalize_lib(l) for l in metadata.pop('dependencies')]
|
||||
metadata['dependencies'] = [normalize_dependency(l) for l in metadata.pop('dependencies')]
|
||||
|
||||
return metadata
|
||||
|
||||
def _normalize_lib(lib):
|
||||
"""lib dependencies can be shortened to just their module name"""
|
||||
if "." in lib or "/" in lib:
|
||||
return lib
|
||||
return "lib/%s.py" % lib
|
||||
|
||||
"""
|
||||
resolve_dependencies(resources)
|
||||
|
||||
|
@ -195,6 +206,21 @@ def _validate_resource(path, resource):
|
|||
if 'categories' not in resource or (not isinstance(resource['categories'], list)) or len(resource['categories']) == 0:
|
||||
resource.setdefault("errors", []).append("___categories___ list is required in main.py but not found")
|
||||
|
||||
|
||||
"""
|
||||
remove_upip(resources)
|
||||
|
||||
upip adds over a 100 resources to the list. Some of them have broken validation as well, so it's
|
||||
useful to remove them after resolving dependencies.
|
||||
"""
|
||||
def remove_upip(resources):
|
||||
to_delete = []
|
||||
for key, resource in resources.items():
|
||||
if resource['type'] == "upip":
|
||||
to_delete.append(key)
|
||||
for key in to_delete:
|
||||
del resources[key]
|
||||
|
||||
"""
|
||||
helpers
|
||||
"""
|
||||
|
@ -209,3 +235,12 @@ def get_error_summary(resources):
|
|||
summary += "\n"
|
||||
return summary.strip()
|
||||
|
||||
def pretty_print_resources(resources):
|
||||
import json
|
||||
return json.dumps(resources, indent=4)
|
||||
|
||||
def normalize_dependency(dependency):
|
||||
"""lib dependencies can be shortened to just their module name"""
|
||||
if "." in dependency or "/" in dependency or "upip:" in dependency:
|
||||
return dependency
|
||||
return "lib/%s.py" % dependency
|
||||
|
|
|
@ -1,30 +1,46 @@
|
|||
import os, glob, shutil, sys
|
||||
import os, shutil, sys, fnmatch
|
||||
|
||||
def sync(storage, patterns):
|
||||
def sync(storage, patterns, resources, verbose):
|
||||
root = get_root()
|
||||
|
||||
# Add all paths that are already files
|
||||
paths = [os.path.join(root, p) for p in (patterns or []) if os.path.isfile(os.path.join(root, p))]
|
||||
paths = set([p for p in (patterns or []) if os.path.isfile(os.path.join(root, p))])
|
||||
|
||||
if patterns:
|
||||
new_patterns = []
|
||||
patterns = [os.path.join(root, p, "**") for p in patterns]
|
||||
else:
|
||||
patterns = ["**/**", "boot.py"]
|
||||
# Always copy boot.py
|
||||
paths.add("boot.py")
|
||||
|
||||
# wifi.json
|
||||
wifi_path = os.path.join(root, "wifi.json")
|
||||
if os.path.isfile(wifi_path):
|
||||
paths.add(wifi_path)
|
||||
|
||||
if not patterns:
|
||||
patterns = ["*"]
|
||||
|
||||
for pattern in patterns:
|
||||
for path in glob.glob(pattern):
|
||||
paths.append(path)
|
||||
|
||||
if len(paths) == 0:
|
||||
print("No files to copy found for pattern %s" % patterns)
|
||||
sys.exit(1)
|
||||
found = False
|
||||
for key, resource in resources.items():
|
||||
if fnmatch.fnmatch(key, pattern):
|
||||
found = True
|
||||
if verbose:
|
||||
print("Resource %s is going to be synced" % key)
|
||||
for path in resource['files'].keys():
|
||||
paths.add(path)
|
||||
if not found:
|
||||
print("WARN: No resources to copy found for pattern %s" % patterns)
|
||||
|
||||
if not verbose:
|
||||
print("Copying %s files: " % len(paths), end="")
|
||||
for path in paths:
|
||||
rel_path = os.path.relpath(path, root)
|
||||
if rel_path.startswith("."):
|
||||
if not path:
|
||||
continue
|
||||
print("Copying %s..." % rel_path)
|
||||
rel_path = os.path.relpath(path, root)
|
||||
if rel_path.startswith(".") or os.path.isdir(path) or os.path.islink(path):
|
||||
continue
|
||||
if verbose:
|
||||
print("Copying %s..." % rel_path)
|
||||
else:
|
||||
print(".", end="")
|
||||
|
||||
target = os.path.join(storage, rel_path)
|
||||
target_dir = os.path.dirname(target)
|
||||
|
@ -35,9 +51,10 @@ def sync(storage, patterns):
|
|||
os.makedirs(target_dir)
|
||||
shutil.copy2(path, target)
|
||||
|
||||
else:
|
||||
if verbose:
|
||||
print("Files copied successfully")
|
||||
|
||||
else:
|
||||
print(" DONE")
|
||||
|
||||
def set_boot_app(storage, app_to_boot):
|
||||
path = os.path.join(storage, 'once.txt')
|
||||
|
@ -47,7 +64,8 @@ def set_boot_app(storage, app_to_boot):
|
|||
pass
|
||||
with open(path, 'w') as f:
|
||||
f.write(app_to_boot + "\n")
|
||||
print("setting next boot to %s" % app_to_boot)
|
||||
if app_to_boot:
|
||||
print("setting next boot to %s" % app_to_boot)
|
||||
|
||||
def get_root():
|
||||
root = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..'))
|
||||
|
|
|
@ -16,7 +16,7 @@ $ tilda_tools sync
|
|||
|
||||
Update files in folder(s) to match current local version
|
||||
$ tilda_tools sync my_game shared
|
||||
$ tilda_tools sync <folder1> <folder2> ...
|
||||
$ tilda_tools sync <pattern1> <pattern2> ...
|
||||
|
||||
Sync (as above), but execute my_app after reboot
|
||||
$ tilda_toold.py sync --boot my_app [<other sync parameter>]
|
||||
|
@ -36,6 +36,9 @@ $ tilda_tools test
|
|||
Update firmware on badge (warning, this will delete all settings etc. stored on the badge!)
|
||||
$ tilda_tools firmware-update
|
||||
|
||||
Setup wifi.json to be copied to the badge on every sync
|
||||
$ tilda_tools wifi
|
||||
|
||||
Common parameters
|
||||
-----------------
|
||||
|
||||
|
@ -45,16 +48,18 @@ Common parameters
|
|||
"""
|
||||
|
||||
import sys, glob
|
||||
import sync, pyboard_util
|
||||
import sync, pyboard_util, wifi, pydfu_util
|
||||
from resources import *
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
cmd_parser = argparse.ArgumentParser(description='Toolchain for working with the TiLDA Mk4')
|
||||
cmd_parser.add_argument('command', nargs=1, help='command [test|reset|sync|run]')
|
||||
cmd_parser.add_argument('command', nargs=1, help='command [test|reset|sync|run|validate|wifi|firmware-update]', choices=['test', 'reset', 'sync', 'validate', 'run', 'wifi', 'firmware-update'])
|
||||
cmd_parser.add_argument('-d', '--device', help='the serial device of the badge')
|
||||
cmd_parser.add_argument('-s', '--storage', help='the usb mass storage path of the badge')
|
||||
cmd_parser.add_argument('-b', '--baudrate', default=115200, help='the baud rate of the serial device')
|
||||
cmd_parser.add_argument('-v', '--verbose', action='store_true', help='adds more output')
|
||||
cmd_parser.add_argument('--print_resources', action='store_true', help='prints resources in json')
|
||||
cmd_parser.add_argument('--boot', help='defines which app to boot into after reboot')
|
||||
cmd_parser.add_argument('--run', help='like run, but after a sync')
|
||||
cmd_parser.add_argument('-w', '--wait', default=0, type=int, help='seconds to wait for USB connected board to become available')
|
||||
|
@ -63,11 +68,20 @@ def main():
|
|||
command = args.command[0]
|
||||
path = sync.get_root()
|
||||
|
||||
if command in ["test", "validate"]:
|
||||
if command == "firmware-update":
|
||||
pydfu_util.firmware_update(args.verbose)
|
||||
|
||||
if command == "wifi":
|
||||
wifi.select_wifi()
|
||||
|
||||
if command in ["test", "validate", "sync"]:
|
||||
resources = get_resources(path)
|
||||
add_metadata(path, resources)
|
||||
resolve_dependencies(resources)
|
||||
validate(path, resources)
|
||||
resolve_dependencies(resources)
|
||||
remove_upip(resources)
|
||||
if args.print_resources:
|
||||
print(pretty_print_resources(resources))
|
||||
errors = get_error_summary(resources)
|
||||
if errors:
|
||||
print("Problems found:\n")
|
||||
|
@ -76,15 +90,20 @@ def main():
|
|||
print("Local Test: PASS")
|
||||
if command == "test":
|
||||
command = "sync"
|
||||
args.path = []
|
||||
args.run = "test/main.py"
|
||||
if len(args.paths) == 0:
|
||||
args.run = "test/main.py"
|
||||
else:
|
||||
if "." not in args.paths[0]:
|
||||
args.paths[0] = "lib/%s.py" % args.paths[0]
|
||||
args.run = args.paths[0]
|
||||
|
||||
|
||||
if command in ["reset", "sync"]:
|
||||
pyboard_util.stop_badge(args)
|
||||
pyboard_util.stop_badge(args, args.verbose)
|
||||
|
||||
if command == "sync":
|
||||
paths = args.paths if len(args.paths) else None
|
||||
sync.sync(get_storage(args), paths)
|
||||
sync.sync(get_storage(args), paths, resources, args.verbose)
|
||||
|
||||
if command in ["reset", "sync"]:
|
||||
sync.set_boot_app(get_storage(args), args.boot or "")
|
||||
|
@ -94,6 +113,7 @@ def main():
|
|||
args.paths = [args.run]
|
||||
|
||||
if command == "run":
|
||||
pyboard_util.check_run(args)
|
||||
pyboard_util.run(args)
|
||||
|
||||
|
||||
|
@ -101,7 +121,7 @@ def main():
|
|||
|
||||
def find_storage():
|
||||
# todo: find solution for windows and linux
|
||||
for pattern in ['/Volumes/PYBFLASH']:
|
||||
for pattern in ['/Volumes/PYBFLASH', '/Volumes/NO NAME']:
|
||||
for path in glob.glob(pattern):
|
||||
return path
|
||||
print("Couldn't find badge storage - Please make it's plugged in and reset it if necessary")
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
TARGET=$(dirname `pwd`)"/upip"
|
||||
rm -rf "/tmp/upip.zip"
|
||||
curl -L "https://github.com/micropython/micropython-lib/archive/master.zip" -o "/tmp/upip.zip"
|
||||
rm -rf "/tmp/upip"
|
||||
unzip -q -a "/tmp/upip.zip" -d "/tmp/upip"
|
||||
cd "/tmp/upip/micropython-lib-master"
|
||||
rm -rf "$TARGET/*"
|
||||
for d in `find . -maxdepth 1 -type d ! -name ".*"`; do
|
||||
echo $d;
|
||||
find "$d" -maxdepth 1 -mindepth 1 \( -name '*.py' -not -name 'test_*' -not -name 'example_*' -not -name 'setup.py' -size +10c \) -or \( -type d -not -name 'dist' -not -name '*.egg-info' -not -name '__pycache__' \) | xargs -I{} bash -c -- 'ditto {} "'"$TARGET"'/"`echo "{}" | sed -e "s/\.\/[^\/]*\///"`';
|
||||
done
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
import os, sync, json
|
||||
|
||||
def select_wifi():
|
||||
ssid = input('Enter wifi name (SSID): ')
|
||||
pw = input('Enter wifi password, leave empty for open network: ')
|
||||
with open(os.path.join(sync.get_root(), "wifi.json"), "wt") as file:
|
||||
if pw:
|
||||
conn_details = {"ssid": ssid, "pw": pw}
|
||||
else:
|
||||
conn_details = {"ssid": ssid}
|
||||
|
||||
file.write(json.dumps(conn_details))
|
||||
print("wifi.json created - It will be transfered to the badge on the next sync")
|
|
@ -1,2 +1,3 @@
|
|||
.DS_Store
|
||||
__pycache__
|
||||
wifi.json
|
||||
|
|
4
boot.py
4
boot.py
|
@ -1,7 +1,9 @@
|
|||
import pyb, os, micropython
|
||||
import pyb, os, micropython, sys
|
||||
|
||||
micropython.alloc_emergency_exception_buf(100)
|
||||
|
||||
sys.path.append('/flash/upip')
|
||||
|
||||
os.sync()
|
||||
root = os.listdir()
|
||||
|
||||
|
|
261
lib/http.py
261
lib/http.py
|
@ -1,3 +1,262 @@
|
|||
"""HTTP library specially tied to TiLDAs functionality"""
|
||||
"""HTTP library specially tied to TiLDAs functionality
|
||||
|
||||
Somewhat inspired by "request".
|
||||
|
||||
Current known issues:
|
||||
* HTTPS is not supported
|
||||
*
|
||||
"""
|
||||
|
||||
___license___ = "MIT"
|
||||
___dependencies___ = ["urlencode"]
|
||||
|
||||
import usocket, ujson, os, time, gc, wifi
|
||||
from urlencode import urlencode
|
||||
|
||||
"""Usage
|
||||
from http_client import *
|
||||
print(get("http://example.com").raise_for_status().content)
|
||||
post("http://mydomain.co.uk/api/post", data="SOMETHING").raise_for_status().close() # If response is not consumed you need to close manually
|
||||
# Or, if you prefer the with syntax:
|
||||
with post("http://mydomain.co.uk/api/post", urlencoded="SOMETHING") as response:
|
||||
response.raise_for_error() # No manual close needed
|
||||
"""
|
||||
|
||||
SUPPORT_TIMEOUT = hasattr(usocket.socket, 'settimeout')
|
||||
CONTENT_TYPE_JSON = 'application/json'
|
||||
BUFFER_SIZE = 1024
|
||||
|
||||
class Response(object):
|
||||
def __init__(self):
|
||||
self.encoding = 'utf-8'
|
||||
self.headers = {}
|
||||
self.status = None
|
||||
self.socket = None
|
||||
self._content = None
|
||||
|
||||
# Hands the responsibility for a socket over to this reponse. This needs to happen
|
||||
# before any content can be inspected
|
||||
def add_socket(self, socket, content_so_far):
|
||||
self.content_so_far = content_so_far
|
||||
self.socket = socket
|
||||
|
||||
@property
|
||||
def content(self, timeout=90):
|
||||
start_time = time.time()
|
||||
if not self._content:
|
||||
if not self.socket:
|
||||
raise OSError("Invalid response socket state. Has the content been downloaded instead?")
|
||||
try:
|
||||
if "Content-Length" in self.headers:
|
||||
content_length = int(self.headers["Content-Length"])
|
||||
elif "content-length" in self.headers:
|
||||
content_length = int(self.headers["content-length"])
|
||||
else:
|
||||
raise Exception("No Content-Length")
|
||||
self._content = self.content_so_far
|
||||
del self.content_so_far
|
||||
while len(self._content) < content_length:
|
||||
buf = self.socket.recv(BUFFER_SIZE)
|
||||
self._content += buf
|
||||
if (time.time() - start_time) > timeout:
|
||||
raise Exception("HTTP request timeout")
|
||||
|
||||
finally:
|
||||
self.close()
|
||||
return self._content;
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return str(self.content, self.encoding) if self.content else ''
|
||||
|
||||
# If you don't use the content of a Response at all you need to manually close it
|
||||
def close(self):
|
||||
if self.socket is not None:
|
||||
self.socket.close()
|
||||
self.socket = None
|
||||
|
||||
def json(self):
|
||||
return ujson.loads(self.text)
|
||||
|
||||
# Writes content into a file. This function will write while receiving, which avoids
|
||||
# having to load all content into memory
|
||||
def download_to(self, target, timeout=90):
|
||||
start_time = time.time()
|
||||
if not self.socket:
|
||||
raise OSError("Invalid response socket state. Has the content already been consumed?")
|
||||
try:
|
||||
if "Content-Length" in self.headers:
|
||||
remaining = int(self.headers["Content-Length"])
|
||||
elif "content-length" in self.headers:
|
||||
remaining = int(self.headers["content-length"])
|
||||
else:
|
||||
raise Exception("No Content-Length")
|
||||
|
||||
with open(target, 'wb') as f:
|
||||
f.write(self.content_so_far)
|
||||
remaining -= len(self.content_so_far)
|
||||
del self.content_so_far
|
||||
while remaining > 0:
|
||||
buf = self.socket.recv(BUFFER_SIZE)
|
||||
f.write(buf)
|
||||
remaining -= len(buf)
|
||||
|
||||
if (time.time() - start_time) > timeout:
|
||||
raise Exception("HTTP request timeout")
|
||||
|
||||
f.flush()
|
||||
os.sync()
|
||||
|
||||
finally:
|
||||
self.close()
|
||||
|
||||
def raise_for_status(self):
|
||||
if 400 <= self.status < 500:
|
||||
raise OSError('Client error: %s' % self.status)
|
||||
if 500 <= self.status < 600:
|
||||
raise OSError('Server error: %s' % self.status)
|
||||
return self
|
||||
|
||||
# In case you want to use "with"
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self.close()
|
||||
|
||||
def open_http_socket(method, url, json=None, timeout=None, headers=None, data=None, params=None):
|
||||
# This will immediately return if we're already connected, otherwise
|
||||
# it'll attempt to connect or prompt for a new network. Proceeding
|
||||
# without an active network connection will cause the getaddrinfo to
|
||||
# fail.
|
||||
wifi.connect(
|
||||
wait=True,
|
||||
show_wait_message=False,
|
||||
prompt_on_fail=True,
|
||||
dialog_title='TiLDA Wifi'
|
||||
)
|
||||
|
||||
urlparts = url.split('/', 3)
|
||||
proto = urlparts[0]
|
||||
host = urlparts[2]
|
||||
urlpath = '' if len(urlparts) < 4 else urlparts[3]
|
||||
|
||||
if proto == 'http:':
|
||||
port = 80
|
||||
elif proto == 'https:':
|
||||
raise OSError("HTTPS is currently not supported")
|
||||
port = 443
|
||||
else:
|
||||
raise OSError('Unsupported protocol: %s' % proto[:-1])
|
||||
|
||||
if ':' in host:
|
||||
host, port = host.split(':')
|
||||
port = int(port)
|
||||
|
||||
if data is not None:
|
||||
if isinstance(data, str):
|
||||
content = data
|
||||
content_type = "text/plain; charset=UTF-8"
|
||||
else:
|
||||
content = urlencode(data)
|
||||
content_type = "application/x-www-form-urlencoded"
|
||||
elif json is not None:
|
||||
content = ujson.dumps(json)
|
||||
content_type = CONTENT_TYPE_JSON
|
||||
else:
|
||||
content = None
|
||||
|
||||
# ToDo: Handle IPv6 addresses
|
||||
if is_ipv4_address(host):
|
||||
addr = (host, port)
|
||||
else:
|
||||
ai = usocket.getaddrinfo(host, port)
|
||||
addr = ai[0][4]
|
||||
|
||||
sock = None
|
||||
if proto == 'https:':
|
||||
sock = usocket.socket(usocket.AF_INET, usocket.SOCK_STREAM, usocket.SEC_SOCKET)
|
||||
else:
|
||||
sock = usocket.socket()
|
||||
|
||||
if params:
|
||||
urlpath += "?" + urlencode(params)
|
||||
|
||||
sock.connect(addr)
|
||||
if proto == 'https:':
|
||||
sock.settimeout(0) # Actually make timeouts working properly with ssl
|
||||
|
||||
sock.send('%s /%s HTTP/1.0\r\nHost: %s\r\n' % (method, urlpath, host))
|
||||
|
||||
if headers is not None:
|
||||
for header in headers.items():
|
||||
sock.send('%s: %s\r\n' % header)
|
||||
|
||||
if content is not None:
|
||||
sock.send('content-length: %s\r\n' % len(content))
|
||||
sock.send('content-type: %s\r\n' % content_type)
|
||||
sock.send('\r\n')
|
||||
sock.send(content)
|
||||
else:
|
||||
sock.send('\r\n')
|
||||
|
||||
return sock
|
||||
|
||||
# Adapted from upip
|
||||
def request(method, url, json=None, timeout=None, headers=None, data=None, params=None):
|
||||
sock = open_http_socket(method, url, json, timeout, headers, data, params)
|
||||
try:
|
||||
response = Response()
|
||||
state = 1
|
||||
hbuf = b""
|
||||
while True:
|
||||
buf = sock.recv(BUFFER_SIZE)
|
||||
if state == 1: # Status
|
||||
nl = buf.find(b"\n")
|
||||
if nl > -1:
|
||||
hbuf += buf[:nl - 1]
|
||||
response.status = int(hbuf.split(b' ')[1])
|
||||
state = 2
|
||||
hbuf = b"";
|
||||
buf = buf[nl + 1:]
|
||||
else:
|
||||
hbuf += buf
|
||||
|
||||
if state == 2: # Headers
|
||||
hbuf += buf
|
||||
nl = hbuf.find(b"\n")
|
||||
while nl > -1:
|
||||
if nl < 2:
|
||||
buf = hbuf[2:]
|
||||
hbuf = None
|
||||
state = 3
|
||||
break
|
||||
|
||||
header = hbuf[:nl - 1].decode("utf8").split(':', 3)
|
||||
response.headers[header[0].strip()] = header[1].strip()
|
||||
hbuf = hbuf[nl + 1:]
|
||||
nl = hbuf.find(b"\n")
|
||||
|
||||
if state == 3: # Content
|
||||
response.add_socket(sock, buf)
|
||||
sock = None # It's not our responsibility to close the socket anymore
|
||||
return response
|
||||
finally:
|
||||
if sock: sock.close()
|
||||
gc.collect()
|
||||
|
||||
def get(url, **kwargs):
|
||||
return request('GET', url, **kwargs)
|
||||
|
||||
def post(url, **kwargs):
|
||||
return request('POST', url, **kwargs)
|
||||
|
||||
def is_ipv4_address(address):
|
||||
octets = address.split('.')
|
||||
try:
|
||||
valid_octets = [x for x in octets if 0 <= int(x) and int(x) <= 255]
|
||||
return len(valid_octets) == 4
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
|
|
|
@ -1,11 +1,4 @@
|
|||
"""This app's purpose is to run a series of tests against library code
|
||||
|
||||
Once successful it displays and prints 'ok' on the screen.
|
||||
|
||||
Please make sure that all tests pass before sending a PR. You can easily
|
||||
do this by running "tilda_tools test". Thank you for keeping all the
|
||||
tests green! *face-throwing-a-kiss-emoji*
|
||||
"""
|
||||
"""Tests for database"""
|
||||
|
||||
___license___ = "MIT"
|
||||
___dependencies___ = ["unittest", "database"]
|
||||
|
|
|
@ -1,21 +1,47 @@
|
|||
"""This app's purpose is to run a series of tests against library code
|
||||
|
||||
Once successful it displays and prints 'ok' on the screen.
|
||||
|
||||
Please make sure that all tests pass before sending a PR. You can easily
|
||||
do this by running "tilda_tools test". Thank you for keeping all the
|
||||
tests green! *face-throwing-a-kiss-emoji*
|
||||
"""
|
||||
"""Tests for http"""
|
||||
|
||||
___license___ = "MIT"
|
||||
___dependencies___ = ["unittest"]
|
||||
___dependencies___ = ["unittest", "http", "wifi"]
|
||||
|
||||
import unittest
|
||||
from http import *
|
||||
import wifi
|
||||
|
||||
class TestHttp(unittest.TestCase):
|
||||
|
||||
def test_foo(self):
|
||||
pass
|
||||
def setUpClass(self):
|
||||
wifi.connect()
|
||||
|
||||
def test_get_with_https(self):
|
||||
with self.assertRaises(OSError) as context:
|
||||
get("https://httpbin.org/get")
|
||||
self.assertIn("HTTPS is currently not supported", str(context.exception))
|
||||
|
||||
def test_get(self):
|
||||
with get("http://httpbin.org/get", params={"foo": "bar"}, headers={"accept": "application/json"}) as response:
|
||||
self.assertEqual(response.headers["Content-Type"], "application/json")
|
||||
self.assertEqual(response.status, 200)
|
||||
content = response.json()
|
||||
self.assertEqual(content["headers"]["Accept"], "application/json")
|
||||
self.assertEqual(content["args"], {"foo":"bar"})
|
||||
|
||||
def test_post_form(self):
|
||||
with post("http://httpbin.org/post", data={"foo": "bar"}).raise_for_status() as response:
|
||||
content = response.json()
|
||||
self.assertEqual(content["headers"]["Content-Type"], "application/x-www-form-urlencoded")
|
||||
self.assertEqual(content["form"], {"foo":"bar"})
|
||||
|
||||
def test_post_string(self):
|
||||
with post("http://httpbin.org/post", data="foobar").raise_for_status() as response:
|
||||
content = response.json()
|
||||
self.assertEqual(content["headers"]["Content-Type"], "text/plain; charset=UTF-8")
|
||||
self.assertEqual(content["data"], "foobar")
|
||||
|
||||
def test_post_json(self):
|
||||
with post("http://httpbin.org/post", json={"foo":"bar"}).raise_for_status() as response:
|
||||
content = response.json()
|
||||
self.assertEqual(content["headers"]["Content-Type"], "application/json")
|
||||
self.assertEqual(content["json"], {"foo":"bar"})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
"""Tests for urlencode"""
|
||||
|
||||
___license___ = "MIT"
|
||||
___dependencies___ = ["unittest", "urlencode"]
|
||||
|
||||
import unittest
|
||||
from urlencode import *
|
||||
|
||||
class TestUrlencode(unittest.TestCase):
|
||||
|
||||
def test_urlencode(self):
|
||||
self.assertEqual(
|
||||
urlencode({"täst":"!£$%(*&^%()", "l": "😃"}),
|
||||
"l=%F0%9F%98%83&t%C3%A4st=%21%C2%A3%24%25%28%2A%26%5E%25%28%29"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
TestUrlencode().run_standalone()
|
117
lib/unittest.py
117
lib/unittest.py
|
@ -1,8 +1,12 @@
|
|||
"""Base libarary for test cases"""
|
||||
"""Base libarary for test cases
|
||||
|
||||
See https://github.com/python/cpython/blob/master/Lib/unittest/case.py for
|
||||
some of the code copied here
|
||||
"""
|
||||
|
||||
___license___ = "MIT"
|
||||
|
||||
import sys
|
||||
import sys, ugfx
|
||||
|
||||
class SkipTest(Exception):
|
||||
"""Indicates a test has been skipped"""
|
||||
|
@ -44,6 +48,7 @@ class TestCase(object):
|
|||
return self.count_fail == 0
|
||||
|
||||
def run_standalone(self):
|
||||
ugfx.clear(0xFFFFFF)
|
||||
self.run()
|
||||
print_result(self.count_pass, self.count_fail, self.count_skip)
|
||||
|
||||
|
@ -72,8 +77,116 @@ class TestCase(object):
|
|||
def assertFalse(self, actual):
|
||||
self.assertEqual(actual, False)
|
||||
|
||||
def assertRaises(self, expected_exception, *args, **kwargs):
|
||||
context = _AssertRaisesContext(expected_exception, self)
|
||||
return context.handle('assertRaises', args, kwargs)
|
||||
|
||||
def assertIn(self, sub, actual):
|
||||
if not sub in actual:
|
||||
raise FailTest("Expected %s to be in %s" % (sub, actual))
|
||||
|
||||
def skip(self):
|
||||
raise SkipTest()
|
||||
|
||||
def print_result(count_pass, count_fail, count_skip):
|
||||
print("PASS: %s FAIL: %s SKIP: %s" % (count_pass, count_fail, count_skip))
|
||||
|
||||
###########################################
|
||||
#### Bits copied straight from cpython ####
|
||||
###########################################
|
||||
|
||||
class _BaseTestCaseContext:
|
||||
|
||||
def __init__(self, test_case):
|
||||
self.test_case = test_case
|
||||
|
||||
def _raiseFailure(self, standardMsg):
|
||||
msg = self.test_case._formatMessage(self.msg, standardMsg)
|
||||
raise self.test_case.failureException(msg)
|
||||
|
||||
class _AssertRaisesBaseContext(_BaseTestCaseContext):
|
||||
|
||||
def __init__(self, expected, test_case, expected_regex=None):
|
||||
_BaseTestCaseContext.__init__(self, test_case)
|
||||
self.expected = expected
|
||||
self.test_case = test_case
|
||||
if expected_regex is not None:
|
||||
expected_regex = re.compile(expected_regex)
|
||||
self.expected_regex = expected_regex
|
||||
self.obj_name = None
|
||||
self.msg = None
|
||||
|
||||
def handle(self, name, args, kwargs):
|
||||
"""
|
||||
If args is empty, assertRaises/Warns is being used as a
|
||||
context manager, so check for a 'msg' kwarg and return self.
|
||||
If args is not empty, call a callable passing positional and keyword
|
||||
arguments.
|
||||
"""
|
||||
try:
|
||||
if not _is_subtype(self.expected, self._base_type):
|
||||
raise TypeError('%s() arg 1 must be %s' %
|
||||
(name, self._base_type_str))
|
||||
if args and args[0] is None:
|
||||
warnings.warn("callable is None",
|
||||
DeprecationWarning, 3)
|
||||
args = ()
|
||||
if not args:
|
||||
self.msg = kwargs.pop('msg', None)
|
||||
if kwargs:
|
||||
warnings.warn('%r is an invalid keyword argument for '
|
||||
'this function' % next(iter(kwargs)),
|
||||
DeprecationWarning, 3)
|
||||
return self
|
||||
|
||||
callable_obj, *args = args
|
||||
try:
|
||||
self.obj_name = callable_obj.__name__
|
||||
except AttributeError:
|
||||
self.obj_name = str(callable_obj)
|
||||
with self:
|
||||
callable_obj(*args, **kwargs)
|
||||
finally:
|
||||
# bpo-23890: manually break a reference cycle
|
||||
self = None
|
||||
|
||||
|
||||
|
||||
class _AssertRaisesContext(_AssertRaisesBaseContext):
|
||||
"""A context manager used to implement TestCase.assertRaises* methods."""
|
||||
|
||||
_base_type = BaseException
|
||||
_base_type_str = 'an exception type or tuple of exception types'
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, tb):
|
||||
if exc_type is None:
|
||||
try:
|
||||
exc_name = self.expected.__name__
|
||||
except AttributeError:
|
||||
exc_name = str(self.expected)
|
||||
if self.obj_name:
|
||||
self._raiseFailure("{} not raised by {}".format(exc_name,
|
||||
self.obj_name))
|
||||
else:
|
||||
self._raiseFailure("{} not raised".format(exc_name))
|
||||
if not issubclass(exc_type, self.expected):
|
||||
# let unexpected exceptions pass through
|
||||
return False
|
||||
# store exception
|
||||
self.exception = exc_value
|
||||
if self.expected_regex is None:
|
||||
return True
|
||||
|
||||
expected_regex = self.expected_regex
|
||||
if not expected_regex.search(str(exc_value)):
|
||||
self._raiseFailure('"{}" does not match "{}"'.format(
|
||||
expected_regex.pattern, str(exc_value)))
|
||||
return True
|
||||
|
||||
def _is_subtype(expected, basetype):
|
||||
if isinstance(expected, tuple):
|
||||
return all(_is_subtype(e, basetype) for e in expected)
|
||||
return isinstance(expected, type) and issubclass(expected, basetype)
|
||||
|
|
|
@ -0,0 +1,133 @@
|
|||
"""URL encoding helper
|
||||
|
||||
Mostly taken from urllib.parse (which is sadly too large to be imported directly)
|
||||
|
||||
I've removed most of the comment to make it easier on micropython
|
||||
"""
|
||||
___license___ = "Python"
|
||||
___dependencies___ = ["upip:collections"]
|
||||
|
||||
from collections.defaultdict import defaultdict
|
||||
|
||||
_ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||||
b'abcdefghijklmnopqrstuvwxyz'
|
||||
b'0123456789'
|
||||
b'_.-')
|
||||
_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)
|
||||
|
||||
_safe_quoters = {}
|
||||
|
||||
class Quoter(defaultdict):
|
||||
def __init__(self, safe):
|
||||
"""safe: bytes object."""
|
||||
self.safe = _ALWAYS_SAFE.union(safe)
|
||||
|
||||
def __repr__(self):
|
||||
# Without this, will just display as a defaultdict
|
||||
return "<Quoter %r>" % dict(self)
|
||||
|
||||
def __missing__(self, b):
|
||||
# Handle a cache miss. Store quoted string in cache and return.
|
||||
res = chr(b) if b in self.safe else '%{:02X}'.format(b)
|
||||
self[b] = res
|
||||
return res
|
||||
|
||||
def quote(string, safe='/', encoding=None, errors=None):
|
||||
if isinstance(string, str):
|
||||
if not string:
|
||||
return string
|
||||
if encoding is None:
|
||||
encoding = 'utf-8'
|
||||
if errors is None:
|
||||
errors = 'strict'
|
||||
string = string.encode(encoding, errors)
|
||||
else:
|
||||
if encoding is not None:
|
||||
raise TypeError("quote() doesn't support 'encoding' for bytes")
|
||||
if errors is not None:
|
||||
raise TypeError("quote() doesn't support 'errors' for bytes")
|
||||
return quote_from_bytes(string, safe)
|
||||
|
||||
def quote_plus(string, safe='', encoding=None, errors=None):
|
||||
if ((isinstance(string, str) and ' ' not in string) or
|
||||
(isinstance(string, bytes) and b' ' not in string)):
|
||||
return quote(string, safe, encoding, errors)
|
||||
if isinstance(safe, str):
|
||||
space = ' '
|
||||
else:
|
||||
space = b' '
|
||||
string = quote(string, safe + space, encoding, errors)
|
||||
return string.replace(' ', '+')
|
||||
|
||||
def quote_from_bytes(bs, safe='/'):
|
||||
if not isinstance(bs, (bytes, bytearray)):
|
||||
raise TypeError("quote_from_bytes() expected bytes")
|
||||
if not bs:
|
||||
return ''
|
||||
if isinstance(safe, str):
|
||||
safe = safe.encode('ascii', 'ignore')
|
||||
else:
|
||||
safe = bytes([c for c in safe if c < 128])
|
||||
if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
|
||||
return bs.decode()
|
||||
try:
|
||||
quoter = _safe_quoters[safe]
|
||||
except KeyError:
|
||||
_safe_quoters[safe] = quoter = Quoter(safe).__getitem__
|
||||
return ''.join([quoter(char) for char in bs])
|
||||
|
||||
def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
|
||||
if hasattr(query, "items"):
|
||||
query = query.items()
|
||||
else:
|
||||
try:
|
||||
if len(query) and not isinstance(query[0], tuple):
|
||||
raise TypeError
|
||||
except TypeError:
|
||||
raise TypeError("not a valid non-string sequence "
|
||||
"or mapping object")#.with_traceback(tb)
|
||||
|
||||
l = []
|
||||
if not doseq:
|
||||
for k, v in query:
|
||||
if isinstance(k, bytes):
|
||||
k = quote_plus(k, safe)
|
||||
else:
|
||||
k = quote_plus(str(k), safe, encoding, errors)
|
||||
|
||||
if isinstance(v, bytes):
|
||||
v = quote_plus(v, safe)
|
||||
else:
|
||||
v = quote_plus(str(v), safe, encoding, errors)
|
||||
l.append(k + '=' + v)
|
||||
else:
|
||||
for k, v in query:
|
||||
if isinstance(k, bytes):
|
||||
k = quote_plus(k, safe)
|
||||
else:
|
||||
k = quote_plus(str(k), safe, encoding, errors)
|
||||
|
||||
if isinstance(v, bytes):
|
||||
v = quote_plus(v, safe)
|
||||
l.append(k + '=' + v)
|
||||
elif isinstance(v, str):
|
||||
v = quote_plus(v, safe, encoding, errors)
|
||||
l.append(k + '=' + v)
|
||||
else:
|
||||
try:
|
||||
# Is this a sufficient test for sequence-ness?
|
||||
x = len(v)
|
||||
except TypeError:
|
||||
# not a sequence
|
||||
v = quote_plus(str(v), safe, encoding, errors)
|
||||
l.append(k + '=' + v)
|
||||
else:
|
||||
# loop over the sequence
|
||||
for elt in v:
|
||||
if isinstance(elt, bytes):
|
||||
elt = quote_plus(elt, safe)
|
||||
else:
|
||||
elt = quote_plus(str(elt), safe, encoding, errors)
|
||||
l.append(k + '=' + elt)
|
||||
return '&'.join(l)
|
||||
|
|
@ -10,11 +10,12 @@ tests green! *face-throwing-a-kiss-emoji*
|
|||
___license___ = "MIT"
|
||||
___categories___ = ["Development"]
|
||||
___name___ = "Integration test app"
|
||||
___dependencies___ = ["unittest", "test_database", "test_http"]
|
||||
___dependencies___ = ["unittest", "test_database", "test_http", "test_urlencode"]
|
||||
|
||||
# Add all tests that need to be run here:
|
||||
import test_database
|
||||
import test_http
|
||||
import test_urlencode
|
||||
|
||||
# run
|
||||
import sys, unittest
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
nested_scopes = True
|
||||
generators = True
|
||||
division = True
|
||||
absolute_import = True
|
||||
with_statement = True
|
||||
print_function = True
|
||||
unicode_literals = True
|
|
@ -0,0 +1,34 @@
|
|||
import ffi
|
||||
import sys
|
||||
|
||||
|
||||
_h = None
|
||||
|
||||
names = ('libc.so', 'libc.so.0', 'libc.so.6', 'libc.dylib')
|
||||
|
||||
def get():
|
||||
global _h
|
||||
if _h:
|
||||
return _h
|
||||
err = None
|
||||
for n in names:
|
||||
try:
|
||||
_h = ffi.open(n)
|
||||
return _h
|
||||
except OSError as e:
|
||||
err = e
|
||||
raise err
|
||||
|
||||
|
||||
def set_names(n):
|
||||
global names
|
||||
names = n
|
||||
|
||||
# Find out bitness of the platform, even if long ints are not supported
|
||||
# TODO: All bitness differences should be removed from micropython-lib, and
|
||||
# this snippet too.
|
||||
bitness = 1
|
||||
v = sys.maxsize
|
||||
while v:
|
||||
bitness += 1
|
||||
v >>= 1
|
|
@ -0,0 +1,395 @@
|
|||
"""Shared support for scanning document type declarations in HTML and XHTML.
|
||||
|
||||
This module is used as a foundation for the html.parser module. It has no
|
||||
documented public API and should not be used directly.
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match
|
||||
_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match
|
||||
_commentclose = re.compile(r'--\s*>')
|
||||
_markedsectionclose = re.compile(r']\s*]\s*>')
|
||||
|
||||
# An analysis of the MS-Word extensions is available at
|
||||
# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf
|
||||
|
||||
_msmarkedsectionclose = re.compile(r']\s*>')
|
||||
|
||||
del re
|
||||
|
||||
|
||||
class ParserBase:
|
||||
"""Parser base class which provides some common support methods used
|
||||
by the SGML/HTML and XHTML parsers."""
|
||||
|
||||
def __init__(self):
|
||||
if self.__class__ is ParserBase:
|
||||
raise RuntimeError(
|
||||
"_markupbase.ParserBase must be subclassed")
|
||||
|
||||
def error(self, message):
|
||||
raise NotImplementedError(
|
||||
"subclasses of ParserBase must override error()")
|
||||
|
||||
def reset(self):
|
||||
self.lineno = 1
|
||||
self.offset = 0
|
||||
|
||||
def getpos(self):
|
||||
"""Return current line number and offset."""
|
||||
return self.lineno, self.offset
|
||||
|
||||
# Internal -- update line number and offset. This should be
|
||||
# called for each piece of data exactly once, in order -- in other
|
||||
# words the concatenation of all the input strings to this
|
||||
# function should be exactly the entire input.
|
||||
def updatepos(self, i, j):
|
||||
if i >= j:
|
||||
return j
|
||||
rawdata = self.rawdata
|
||||
nlines = rawdata.count("\n", i, j)
|
||||
if nlines:
|
||||
self.lineno = self.lineno + nlines
|
||||
pos = rawdata.rindex("\n", i, j) # Should not fail
|
||||
self.offset = j-(pos+1)
|
||||
else:
|
||||
self.offset = self.offset + j-i
|
||||
return j
|
||||
|
||||
_decl_otherchars = ''
|
||||
|
||||
# Internal -- parse declaration (for use by subclasses).
|
||||
def parse_declaration(self, i):
|
||||
# This is some sort of declaration; in "HTML as
|
||||
# deployed," this should only be the document type
|
||||
# declaration ("<!DOCTYPE html...>").
|
||||
# ISO 8879:1986, however, has more complex
|
||||
# declaration syntax for elements in <!...>, including:
|
||||
# --comment--
|
||||
# [marked section]
|
||||
# name in the following list: ENTITY, DOCTYPE, ELEMENT,
|
||||
# ATTLIST, NOTATION, SHORTREF, USEMAP,
|
||||
# LINKTYPE, LINK, IDLINK, USELINK, SYSTEM
|
||||
rawdata = self.rawdata
|
||||
j = i + 2
|
||||
assert rawdata[i:j] == "<!", "unexpected call to parse_declaration"
|
||||
if rawdata[j:j+1] == ">":
|
||||
# the empty comment <!>
|
||||
return j + 1
|
||||
if rawdata[j:j+1] in ("-", ""):
|
||||
# Start of comment followed by buffer boundary,
|
||||
# or just a buffer boundary.
|
||||
return -1
|
||||
# A simple, practical version could look like: ((name|stringlit) S*) + '>'
|
||||
n = len(rawdata)
|
||||
if rawdata[j:j+2] == '--': #comment
|
||||
# Locate --.*-- as the body of the comment
|
||||
return self.parse_comment(i)
|
||||
elif rawdata[j] == '[': #marked section
|
||||
# Locate [statusWord [...arbitrary SGML...]] as the body of the marked section
|
||||
# Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA
|
||||
# Note that this is extended by Microsoft Office "Save as Web" function
|
||||
# to include [if...] and [endif].
|
||||
return self.parse_marked_section(i)
|
||||
else: #all other declaration elements
|
||||
decltype, j = self._scan_name(j, i)
|
||||
if j < 0:
|
||||
return j
|
||||
if decltype == "doctype":
|
||||
self._decl_otherchars = ''
|
||||
while j < n:
|
||||
c = rawdata[j]
|
||||
if c == ">":
|
||||
# end of declaration syntax
|
||||
data = rawdata[i+2:j]
|
||||
if decltype == "doctype":
|
||||
self.handle_decl(data)
|
||||
else:
|
||||
# According to the HTML5 specs sections "8.2.4.44 Bogus
|
||||
# comment state" and "8.2.4.45 Markup declaration open
|
||||
# state", a comment token should be emitted.
|
||||
# Calling unknown_decl provides more flexibility though.
|
||||
self.unknown_decl(data)
|
||||
return j + 1
|
||||
if c in "\"'":
|
||||
m = _declstringlit_match(rawdata, j)
|
||||
if not m:
|
||||
return -1 # incomplete
|
||||
j = m.end()
|
||||
elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
|
||||
name, j = self._scan_name(j, i)
|
||||
elif c in self._decl_otherchars:
|
||||
j = j + 1
|
||||
elif c == "[":
|
||||
# this could be handled in a separate doctype parser
|
||||
if decltype == "doctype":
|
||||
j = self._parse_doctype_subset(j + 1, i)
|
||||
elif decltype in {"attlist", "linktype", "link", "element"}:
|
||||
# must tolerate []'d groups in a content model in an element declaration
|
||||
# also in data attribute specifications of attlist declaration
|
||||
# also link type declaration subsets in linktype declarations
|
||||
# also link attribute specification lists in link declarations
|
||||
self.error("unsupported '[' char in %s declaration" % decltype)
|
||||
else:
|
||||
self.error("unexpected '[' char in declaration")
|
||||
else:
|
||||
self.error(
|
||||
"unexpected %r char in declaration" % rawdata[j])
|
||||
if j < 0:
|
||||
return j
|
||||
return -1 # incomplete
|
||||
|
||||
# Internal -- parse a marked section
|
||||
# Override this to handle MS-word extension syntax <![if word]>content<![endif]>
|
||||
def parse_marked_section(self, i, report=1):
|
||||
rawdata= self.rawdata
|
||||
assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()"
|
||||
sectName, j = self._scan_name( i+3, i )
|
||||
if j < 0:
|
||||
return j
|
||||
if sectName in {"temp", "cdata", "ignore", "include", "rcdata"}:
|
||||
# look for standard ]]> ending
|
||||
match= _markedsectionclose.search(rawdata, i+3)
|
||||
elif sectName in {"if", "else", "endif"}:
|
||||
# look for MS Office ]> ending
|
||||
match= _msmarkedsectionclose.search(rawdata, i+3)
|
||||
else:
|
||||
self.error('unknown status keyword %r in marked section' % rawdata[i+3:j])
|
||||
if not match:
|
||||
return -1
|
||||
if report:
|
||||
j = match.start(0)
|
||||
self.unknown_decl(rawdata[i+3: j])
|
||||
return match.end(0)
|
||||
|
||||
# Internal -- parse comment, return length or -1 if not terminated
|
||||
def parse_comment(self, i, report=1):
|
||||
rawdata = self.rawdata
|
||||
if rawdata[i:i+4] != '<!--':
|
||||
self.error('unexpected call to parse_comment()')
|
||||
match = _commentclose.search(rawdata, i+4)
|
||||
if not match:
|
||||
return -1
|
||||
if report:
|
||||
j = match.start(0)
|
||||
self.handle_comment(rawdata[i+4: j])
|
||||
return match.end(0)
|
||||
|
||||
# Internal -- scan past the internal subset in a <!DOCTYPE declaration,
|
||||
# returning the index just past any whitespace following the trailing ']'.
|
||||
def _parse_doctype_subset(self, i, declstartpos):
|
||||
rawdata = self.rawdata
|
||||
n = len(rawdata)
|
||||
j = i
|
||||
while j < n:
|
||||
c = rawdata[j]
|
||||
if c == "<":
|
||||
s = rawdata[j:j+2]
|
||||
if s == "<":
|
||||
# end of buffer; incomplete
|
||||
return -1
|
||||
if s != "<!":
|
||||
self.updatepos(declstartpos, j + 1)
|
||||
self.error("unexpected char in internal subset (in %r)" % s)
|
||||
if (j + 2) == n:
|
||||
# end of buffer; incomplete
|
||||
return -1
|
||||
if (j + 4) > n:
|
||||
# end of buffer; incomplete
|
||||
return -1
|
||||
if rawdata[j:j+4] == "<!--":
|
||||
j = self.parse_comment(j, report=0)
|
||||
if j < 0:
|
||||
return j
|
||||
continue
|
||||
name, j = self._scan_name(j + 2, declstartpos)
|
||||
if j == -1:
|
||||
return -1
|
||||
if name not in {"attlist", "element", "entity", "notation"}:
|
||||
self.updatepos(declstartpos, j + 2)
|
||||
self.error(
|
||||
"unknown declaration %r in internal subset" % name)
|
||||
# handle the individual names
|
||||
meth = getattr(self, "_parse_doctype_" + name)
|
||||
j = meth(j, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
elif c == "%":
|
||||
# parameter entity reference
|
||||
if (j + 1) == n:
|
||||
# end of buffer; incomplete
|
||||
return -1
|
||||
s, j = self._scan_name(j + 1, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
if rawdata[j] == ";":
|
||||
j = j + 1
|
||||
elif c == "]":
|
||||
j = j + 1
|
||||
while j < n and rawdata[j].isspace():
|
||||
j = j + 1
|
||||
if j < n:
|
||||
if rawdata[j] == ">":
|
||||
return j
|
||||
self.updatepos(declstartpos, j)
|
||||
self.error("unexpected char after internal subset")
|
||||
else:
|
||||
return -1
|
||||
elif c.isspace():
|
||||
j = j + 1
|
||||
else:
|
||||
self.updatepos(declstartpos, j)
|
||||
self.error("unexpected char %r in internal subset" % c)
|
||||
# end of buffer reached
|
||||
return -1
|
||||
|
||||
# Internal -- scan past <!ELEMENT declarations
|
||||
def _parse_doctype_element(self, i, declstartpos):
|
||||
name, j = self._scan_name(i, declstartpos)
|
||||
if j == -1:
|
||||
return -1
|
||||
# style content model; just skip until '>'
|
||||
rawdata = self.rawdata
|
||||
if '>' in rawdata[j:]:
|
||||
return rawdata.find(">", j) + 1
|
||||
return -1
|
||||
|
||||
# Internal -- scan past <!ATTLIST declarations
|
||||
def _parse_doctype_attlist(self, i, declstartpos):
|
||||
rawdata = self.rawdata
|
||||
name, j = self._scan_name(i, declstartpos)
|
||||
c = rawdata[j:j+1]
|
||||
if c == "":
|
||||
return -1
|
||||
if c == ">":
|
||||
return j + 1
|
||||
while 1:
|
||||
# scan a series of attribute descriptions; simplified:
|
||||
# name type [value] [#constraint]
|
||||
name, j = self._scan_name(j, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
c = rawdata[j:j+1]
|
||||
if c == "":
|
||||
return -1
|
||||
if c == "(":
|
||||
# an enumerated type; look for ')'
|
||||
if ")" in rawdata[j:]:
|
||||
j = rawdata.find(")", j) + 1
|
||||
else:
|
||||
return -1
|
||||
while rawdata[j:j+1].isspace():
|
||||
j = j + 1
|
||||
if not rawdata[j:]:
|
||||
# end of buffer, incomplete
|
||||
return -1
|
||||
else:
|
||||
name, j = self._scan_name(j, declstartpos)
|
||||
c = rawdata[j:j+1]
|
||||
if not c:
|
||||
return -1
|
||||
if c in "'\"":
|
||||
m = _declstringlit_match(rawdata, j)
|
||||
if m:
|
||||
j = m.end()
|
||||
else:
|
||||
return -1
|
||||
c = rawdata[j:j+1]
|
||||
if not c:
|
||||
return -1
|
||||
if c == "#":
|
||||
if rawdata[j:] == "#":
|
||||
# end of buffer
|
||||
return -1
|
||||
name, j = self._scan_name(j + 1, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
c = rawdata[j:j+1]
|
||||
if not c:
|
||||
return -1
|
||||
if c == '>':
|
||||
# all done
|
||||
return j + 1
|
||||
|
||||
# Internal -- scan past <!NOTATION declarations
|
||||
def _parse_doctype_notation(self, i, declstartpos):
|
||||
name, j = self._scan_name(i, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
rawdata = self.rawdata
|
||||
while 1:
|
||||
c = rawdata[j:j+1]
|
||||
if not c:
|
||||
# end of buffer; incomplete
|
||||
return -1
|
||||
if c == '>':
|
||||
return j + 1
|
||||
if c in "'\"":
|
||||
m = _declstringlit_match(rawdata, j)
|
||||
if not m:
|
||||
return -1
|
||||
j = m.end()
|
||||
else:
|
||||
name, j = self._scan_name(j, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
|
||||
# Internal -- scan past <!ENTITY declarations
|
||||
def _parse_doctype_entity(self, i, declstartpos):
|
||||
rawdata = self.rawdata
|
||||
if rawdata[i:i+1] == "%":
|
||||
j = i + 1
|
||||
while 1:
|
||||
c = rawdata[j:j+1]
|
||||
if not c:
|
||||
return -1
|
||||
if c.isspace():
|
||||
j = j + 1
|
||||
else:
|
||||
break
|
||||
else:
|
||||
j = i
|
||||
name, j = self._scan_name(j, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
while 1:
|
||||
c = self.rawdata[j:j+1]
|
||||
if not c:
|
||||
return -1
|
||||
if c in "'\"":
|
||||
m = _declstringlit_match(rawdata, j)
|
||||
if m:
|
||||
j = m.end()
|
||||
else:
|
||||
return -1 # incomplete
|
||||
elif c == ">":
|
||||
return j + 1
|
||||
else:
|
||||
name, j = self._scan_name(j, declstartpos)
|
||||
if j < 0:
|
||||
return j
|
||||
|
||||
# Internal -- scan a name token and the new position and the token, or
|
||||
# return -1 if we've reached the end of the buffer.
|
||||
def _scan_name(self, i, declstartpos):
|
||||
rawdata = self.rawdata
|
||||
n = len(rawdata)
|
||||
if i == n:
|
||||
return None, -1
|
||||
m = _declname_match(rawdata, i)
|
||||
if m:
|
||||
s = m.group()
|
||||
name = s.strip()
|
||||
if (i + len(s)) == n:
|
||||
return None, -1 # end of buffer
|
||||
return name.lower(), m.end()
|
||||
else:
|
||||
self.updatepos(declstartpos, i)
|
||||
self.error("expected name token at %r"
|
||||
% rawdata[declstartpos:declstartpos+20])
|
||||
|
||||
# To be overridden -- handlers for unknown objects
|
||||
def unknown_decl(self, data):
|
||||
pass
|
|
@ -0,0 +1,2 @@
|
|||
def abstractmethod(f):
|
||||
return f
|
|
@ -0,0 +1,216 @@
|
|||
"""
|
||||
Minimal and functional version of CPython's argparse module.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from ucollections import namedtuple
|
||||
|
||||
|
||||
class _ArgError(BaseException):
|
||||
pass
|
||||
|
||||
|
||||
class _Arg:
|
||||
def __init__(self, names, dest, action, nargs, const, default, help):
|
||||
self.names = names
|
||||
self.dest = dest
|
||||
self.action = action
|
||||
self.nargs = nargs
|
||||
self.const = const
|
||||
self.default = default
|
||||
self.help = help
|
||||
|
||||
def parse(self, optname, args):
|
||||
# parse args for this arg
|
||||
if self.action == "store":
|
||||
if self.nargs is None:
|
||||
if args:
|
||||
return args.pop(0)
|
||||
else:
|
||||
raise _ArgError("expecting value for %s" % optname)
|
||||
elif self.nargs == "?":
|
||||
if args:
|
||||
return args.pop(0)
|
||||
else:
|
||||
return self.default
|
||||
else:
|
||||
if self.nargs == "*":
|
||||
n = -1
|
||||
elif self.nargs == "+":
|
||||
if not args:
|
||||
raise _ArgError("expecting value for %s" % optname)
|
||||
n = -1
|
||||
else:
|
||||
n = int(self.nargs)
|
||||
ret = []
|
||||
stop_at_opt = True
|
||||
while args and n != 0:
|
||||
if stop_at_opt and args[0].startswith("-") and args[0] != "-":
|
||||
if args[0] == "--":
|
||||
stop_at_opt = False
|
||||
args.pop(0)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
ret.append(args.pop(0))
|
||||
n -= 1
|
||||
if n > 0:
|
||||
raise _ArgError("expecting value for %s" % optname)
|
||||
return ret
|
||||
elif self.action == "store_const":
|
||||
return self.const
|
||||
else:
|
||||
assert False
|
||||
|
||||
|
||||
def _dest_from_optnames(opt_names):
|
||||
dest = opt_names[0]
|
||||
for name in opt_names:
|
||||
if name.startswith("--"):
|
||||
dest = name
|
||||
break
|
||||
return dest.lstrip("-").replace("-", "_")
|
||||
|
||||
|
||||
class ArgumentParser:
|
||||
def __init__(self, *, description=""):
|
||||
self.description = description
|
||||
self.opt = []
|
||||
self.pos = []
|
||||
|
||||
def add_argument(self, *args, **kwargs):
|
||||
action = kwargs.get("action", "store")
|
||||
if action == "store_true":
|
||||
action = "store_const"
|
||||
const = True
|
||||
default = kwargs.get("default", False)
|
||||
elif action == "store_false":
|
||||
action = "store_const"
|
||||
const = False
|
||||
default = kwargs.get("default", True)
|
||||
else:
|
||||
const = kwargs.get("const", None)
|
||||
default = kwargs.get("default", None)
|
||||
if args and args[0].startswith("-"):
|
||||
list = self.opt
|
||||
dest = kwargs.get("dest")
|
||||
if dest is None:
|
||||
dest = _dest_from_optnames(args)
|
||||
else:
|
||||
list = self.pos
|
||||
dest = kwargs.get("dest")
|
||||
if dest is None:
|
||||
dest = args[0]
|
||||
if not args:
|
||||
args = [dest]
|
||||
list.append(
|
||||
_Arg(args, dest, action, kwargs.get("nargs", None),
|
||||
const, default, kwargs.get("help", "")))
|
||||
|
||||
def usage(self, full):
|
||||
# print short usage
|
||||
print("usage: %s [-h]" % sys.argv[0], end="")
|
||||
|
||||
def render_arg(arg):
|
||||
if arg.action == "store":
|
||||
if arg.nargs is None:
|
||||
return " %s" % arg.dest
|
||||
if isinstance(arg.nargs, int):
|
||||
return " %s(x%d)" % (arg.dest, arg.nargs)
|
||||
else:
|
||||
return " %s%s" % (arg.dest, arg.nargs)
|
||||
else:
|
||||
return ""
|
||||
for opt in self.opt:
|
||||
print(" [%s%s]" % (', '.join(opt.names), render_arg(opt)), end="")
|
||||
for pos in self.pos:
|
||||
print(render_arg(pos), end="")
|
||||
print()
|
||||
|
||||
if not full:
|
||||
return
|
||||
|
||||
# print full information
|
||||
print()
|
||||
if self.description:
|
||||
print(self.description)
|
||||
if self.pos:
|
||||
print("\npositional args:")
|
||||
for pos in self.pos:
|
||||
print(" %-16s%s" % (pos.names[0], pos.help))
|
||||
print("\noptional args:")
|
||||
print(" -h, --help show this message and exit")
|
||||
for opt in self.opt:
|
||||
print(" %-16s%s" % (', '.join(opt.names) + render_arg(opt), opt.help))
|
||||
|
||||
def parse_args(self, args=None):
|
||||
return self._parse_args_impl(args, False)
|
||||
|
||||
def parse_known_args(self, args=None):
|
||||
return self._parse_args_impl(args, True)
|
||||
|
||||
def _parse_args_impl(self, args, return_unknown):
|
||||
if args is None:
|
||||
args = sys.argv[1:]
|
||||
else:
|
||||
args = args[:]
|
||||
try:
|
||||
return self._parse_args(args, return_unknown)
|
||||
except _ArgError as e:
|
||||
self.usage(False)
|
||||
print("error:", e)
|
||||
sys.exit(2)
|
||||
|
||||
def _parse_args(self, args, return_unknown):
|
||||
# add optional args with defaults
|
||||
arg_dest = []
|
||||
arg_vals = []
|
||||
for opt in self.opt:
|
||||
arg_dest.append(opt.dest)
|
||||
arg_vals.append(opt.default)
|
||||
|
||||
# deal with unknown arguments, if needed
|
||||
unknown = []
|
||||
def consume_unknown():
|
||||
while args and not args[0].startswith("-"):
|
||||
unknown.append(args.pop(0))
|
||||
|
||||
# parse all args
|
||||
parsed_pos = False
|
||||
while args or not parsed_pos:
|
||||
if args and args[0].startswith("-") and args[0] != "-" and args[0] != "--":
|
||||
# optional arg
|
||||
a = args.pop(0)
|
||||
if a in ("-h", "--help"):
|
||||
self.usage(True)
|
||||
sys.exit(0)
|
||||
found = False
|
||||
for i, opt in enumerate(self.opt):
|
||||
if a in opt.names:
|
||||
arg_vals[i] = opt.parse(a, args)
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
if return_unknown:
|
||||
unknown.append(a)
|
||||
consume_unknown()
|
||||
else:
|
||||
raise _ArgError("unknown option %s" % a)
|
||||
else:
|
||||
# positional arg
|
||||
if parsed_pos:
|
||||
if return_unknown:
|
||||
unknown = unknown + args
|
||||
break
|
||||
else:
|
||||
raise _ArgError("extra args: %s" % " ".join(args))
|
||||
for pos in self.pos:
|
||||
arg_dest.append(pos.dest)
|
||||
arg_vals.append(pos.parse(pos.names[0], args))
|
||||
parsed_pos = True
|
||||
if return_unknown:
|
||||
consume_unknown()
|
||||
|
||||
# build and return named tuple with arg values
|
||||
values = namedtuple("args", arg_dest)(*arg_vals)
|
||||
return (values, unknown) if return_unknown else values
|
|
@ -0,0 +1,151 @@
|
|||
import time
|
||||
import logging
|
||||
|
||||
|
||||
log = logging.getLogger("asyncio")
|
||||
|
||||
|
||||
# Workaround for not being able to subclass builtin types
|
||||
class LoopStop(Exception):
|
||||
pass
|
||||
|
||||
class InvalidStateError(Exception):
|
||||
pass
|
||||
|
||||
# Object not matching any other object
|
||||
_sentinel = []
|
||||
|
||||
|
||||
class EventLoop:
|
||||
|
||||
def __init__(self):
|
||||
self.q = []
|
||||
|
||||
def call_soon(self, c, *args):
|
||||
self.q.append((c, args))
|
||||
|
||||
def call_later(self, delay, c, *args):
|
||||
def _delayed(c, args, delay):
|
||||
yield from sleep(delay)
|
||||
self.call_soon(c, *args)
|
||||
Task(_delayed(c, args, delay))
|
||||
|
||||
def run_forever(self):
|
||||
while self.q:
|
||||
c = self.q.pop(0)
|
||||
try:
|
||||
c[0](*c[1])
|
||||
except LoopStop:
|
||||
return
|
||||
# I mean, forever
|
||||
while True:
|
||||
time.sleep(1)
|
||||
|
||||
def stop(self):
|
||||
def _cb():
|
||||
raise LoopStop
|
||||
self.call_soon(_cb)
|
||||
|
||||
def run_until_complete(self, coro):
|
||||
t = ensure_future(coro)
|
||||
t.add_done_callback(lambda a: self.stop())
|
||||
self.run_forever()
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
_def_event_loop = EventLoop()
|
||||
|
||||
|
||||
class Future:
|
||||
|
||||
def __init__(self, loop=_def_event_loop):
|
||||
self.loop = loop
|
||||
self.res = _sentinel
|
||||
self.cbs = []
|
||||
|
||||
def result(self):
|
||||
if self.res is _sentinel:
|
||||
raise InvalidStateError
|
||||
return self.res
|
||||
|
||||
def add_done_callback(self, fn):
|
||||
if self.res is _sentinel:
|
||||
self.cbs.append(fn)
|
||||
else:
|
||||
self.loop.call_soon(fn, self)
|
||||
|
||||
def set_result(self, val):
|
||||
self.res = val
|
||||
for f in self.cbs:
|
||||
f(self)
|
||||
|
||||
|
||||
class Task(Future):
|
||||
|
||||
def __init__(self, coro, loop=_def_event_loop):
|
||||
super().__init__()
|
||||
self.loop = loop
|
||||
self.c = coro
|
||||
# upstream asyncio forces task to be scheduled on instantiation
|
||||
self.loop.call_soon(self)
|
||||
|
||||
def __call__(self):
|
||||
try:
|
||||
next(self.c)
|
||||
self.loop.call_soon(self)
|
||||
except StopIteration as e:
|
||||
log.debug("Coro finished: %s", self.c)
|
||||
self.set_result(None)
|
||||
|
||||
|
||||
def get_event_loop():
|
||||
return _def_event_loop
|
||||
|
||||
|
||||
# Decorator
|
||||
def coroutine(f):
|
||||
return f
|
||||
|
||||
|
||||
def ensure_future(coro):
|
||||
if isinstance(coro, Future):
|
||||
return coro
|
||||
return Task(coro)
|
||||
|
||||
|
||||
class _Wait(Future):
|
||||
|
||||
def __init__(self, n):
|
||||
Future.__init__(self)
|
||||
self.n = n
|
||||
|
||||
def _done(self):
|
||||
self.n -= 1
|
||||
log.debug("Wait: remaining tasks: %d", self.n)
|
||||
if not self.n:
|
||||
self.set_result(None)
|
||||
|
||||
def __call__(self):
|
||||
pass
|
||||
|
||||
|
||||
def wait(coro_list, loop=_def_event_loop):
|
||||
|
||||
w = _Wait(len(coro_list))
|
||||
|
||||
for c in coro_list:
|
||||
t = ensure_future(c)
|
||||
t.add_done_callback(lambda val: w._done())
|
||||
|
||||
return w
|
||||
|
||||
|
||||
def sleep(secs):
|
||||
t = time.time()
|
||||
log.debug("Started sleep at: %s, targetting: %s", t, t + secs)
|
||||
while time.time() < t + secs:
|
||||
time.sleep(0.01)
|
||||
yield
|
||||
log.debug("Finished sleeping %ss", secs)
|
|
@ -0,0 +1,414 @@
|
|||
#! /usr/bin/env python3
|
||||
|
||||
"""RFC 3548: Base16, Base32, Base64 Data Encodings"""
|
||||
|
||||
# Modified 04-Oct-1995 by Jack Jansen to use binascii module
|
||||
# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
|
||||
# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
|
||||
|
||||
import re
|
||||
import struct
|
||||
import binascii
|
||||
|
||||
|
||||
__all__ = [
|
||||
# Legacy interface exports traditional RFC 1521 Base64 encodings
|
||||
'encode', 'decode', 'encodebytes', 'decodebytes',
|
||||
# Generalized interface for other encodings
|
||||
'b64encode', 'b64decode', 'b32encode', 'b32decode',
|
||||
'b16encode', 'b16decode',
|
||||
# Standard Base64 encoding
|
||||
'standard_b64encode', 'standard_b64decode',
|
||||
# Some common Base64 alternatives. As referenced by RFC 3458, see thread
|
||||
# starting at:
|
||||
#
|
||||
# http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
|
||||
'urlsafe_b64encode', 'urlsafe_b64decode',
|
||||
]
|
||||
|
||||
|
||||
bytes_types = (bytes, bytearray) # Types acceptable as binary data
|
||||
|
||||
def _bytes_from_decode_data(s):
|
||||
if isinstance(s, str):
|
||||
try:
|
||||
return s.encode('ascii')
|
||||
# except UnicodeEncodeError:
|
||||
except:
|
||||
raise ValueError('string argument should contain only ASCII characters')
|
||||
elif isinstance(s, bytes_types):
|
||||
return s
|
||||
else:
|
||||
raise TypeError("argument should be bytes or ASCII string, not %s" % s.__class__.__name__)
|
||||
|
||||
|
||||
|
||||
# Base64 encoding/decoding uses binascii
|
||||
|
||||
def b64encode(s, altchars=None):
|
||||
"""Encode a byte string using Base64.
|
||||
|
||||
s is the byte string to encode. Optional altchars must be a byte
|
||||
string of length 2 which specifies an alternative alphabet for the
|
||||
'+' and '/' characters. This allows an application to
|
||||
e.g. generate url or filesystem safe Base64 strings.
|
||||
|
||||
The encoded byte string is returned.
|
||||
"""
|
||||
if not isinstance(s, bytes_types):
|
||||
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
|
||||
# Strip off the trailing newline
|
||||
encoded = binascii.b2a_base64(s)[:-1]
|
||||
if altchars is not None:
|
||||
if not isinstance(altchars, bytes_types):
|
||||
raise TypeError("expected bytes, not %s"
|
||||
% altchars.__class__.__name__)
|
||||
assert len(altchars) == 2, repr(altchars)
|
||||
return encoded.translate(bytes.maketrans(b'+/', altchars))
|
||||
return encoded
|
||||
|
||||
|
||||
def b64decode(s, altchars=None, validate=False):
|
||||
"""Decode a Base64 encoded byte string.
|
||||
|
||||
s is the byte string to decode. Optional altchars must be a
|
||||
string of length 2 which specifies the alternative alphabet used
|
||||
instead of the '+' and '/' characters.
|
||||
|
||||
The decoded string is returned. A binascii.Error is raised if s is
|
||||
incorrectly padded.
|
||||
|
||||
If validate is False (the default), non-base64-alphabet characters are
|
||||
discarded prior to the padding check. If validate is True,
|
||||
non-base64-alphabet characters in the input result in a binascii.Error.
|
||||
"""
|
||||
s = _bytes_from_decode_data(s)
|
||||
if altchars is not None:
|
||||
altchars = _bytes_from_decode_data(altchars)
|
||||
assert len(altchars) == 2, repr(altchars)
|
||||
s = s.translate(bytes.maketrans(altchars, b'+/'))
|
||||
if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
|
||||
raise binascii.Error('Non-base64 digit found')
|
||||
return binascii.a2b_base64(s)
|
||||
|
||||
|
||||
def standard_b64encode(s):
|
||||
"""Encode a byte string using the standard Base64 alphabet.
|
||||
|
||||
s is the byte string to encode. The encoded byte string is returned.
|
||||
"""
|
||||
return b64encode(s)
|
||||
|
||||
def standard_b64decode(s):
|
||||
"""Decode a byte string encoded with the standard Base64 alphabet.
|
||||
|
||||
s is the byte string to decode. The decoded byte string is
|
||||
returned. binascii.Error is raised if the input is incorrectly
|
||||
padded or if there are non-alphabet characters present in the
|
||||
input.
|
||||
"""
|
||||
return b64decode(s)
|
||||
|
||||
|
||||
#_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
|
||||
#_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
|
||||
|
||||
def urlsafe_b64encode(s):
|
||||
"""Encode a byte string using a url-safe Base64 alphabet.
|
||||
|
||||
s is the byte string to encode. The encoded byte string is
|
||||
returned. The alphabet uses '-' instead of '+' and '_' instead of
|
||||
'/'.
|
||||
"""
|
||||
# return b64encode(s).translate(_urlsafe_encode_translation)
|
||||
raise NotImplementedError()
|
||||
|
||||
def urlsafe_b64decode(s):
|
||||
"""Decode a byte string encoded with the standard Base64 alphabet.
|
||||
|
||||
s is the byte string to decode. The decoded byte string is
|
||||
returned. binascii.Error is raised if the input is incorrectly
|
||||
padded or if there are non-alphabet characters present in the
|
||||
input.
|
||||
|
||||
The alphabet uses '-' instead of '+' and '_' instead of '/'.
|
||||
"""
|
||||
# s = _bytes_from_decode_data(s)
|
||||
# s = s.translate(_urlsafe_decode_translation)
|
||||
# return b64decode(s)
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
|
||||
# Base32 encoding/decoding must be done in Python
|
||||
_b32alphabet = {
|
||||
0: b'A', 9: b'J', 18: b'S', 27: b'3',
|
||||
1: b'B', 10: b'K', 19: b'T', 28: b'4',
|
||||
2: b'C', 11: b'L', 20: b'U', 29: b'5',
|
||||
3: b'D', 12: b'M', 21: b'V', 30: b'6',
|
||||
4: b'E', 13: b'N', 22: b'W', 31: b'7',
|
||||
5: b'F', 14: b'O', 23: b'X',
|
||||
6: b'G', 15: b'P', 24: b'Y',
|
||||
7: b'H', 16: b'Q', 25: b'Z',
|
||||
8: b'I', 17: b'R', 26: b'2',
|
||||
}
|
||||
|
||||
_b32tab = [v[0] for k, v in sorted(_b32alphabet.items())]
|
||||
_b32rev = dict([(v[0], k) for k, v in _b32alphabet.items()])
|
||||
|
||||
|
||||
def b32encode(s):
|
||||
"""Encode a byte string using Base32.
|
||||
|
||||
s is the byte string to encode. The encoded byte string is returned.
|
||||
"""
|
||||
if not isinstance(s, bytes_types):
|
||||
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
|
||||
quanta, leftover = divmod(len(s), 5)
|
||||
# Pad the last quantum with zero bits if necessary
|
||||
if leftover:
|
||||
s = s + bytes(5 - leftover) # Don't use += !
|
||||
quanta += 1
|
||||
encoded = bytearray()
|
||||
for i in range(quanta):
|
||||
# c1 and c2 are 16 bits wide, c3 is 8 bits wide. The intent of this
|
||||
# code is to process the 40 bits in units of 5 bits. So we take the 1
|
||||
# leftover bit of c1 and tack it onto c2. Then we take the 2 leftover
|
||||
# bits of c2 and tack them onto c3. The shifts and masks are intended
|
||||
# to give us values of exactly 5 bits in width.
|
||||
c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5])
|
||||
c2 += (c1 & 1) << 16 # 17 bits wide
|
||||
c3 += (c2 & 3) << 8 # 10 bits wide
|
||||
encoded += bytes([_b32tab[c1 >> 11], # bits 1 - 5
|
||||
_b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10
|
||||
_b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15
|
||||
_b32tab[c2 >> 12], # bits 16 - 20 (1 - 5)
|
||||
_b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10)
|
||||
_b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15)
|
||||
_b32tab[c3 >> 5], # bits 31 - 35 (1 - 5)
|
||||
_b32tab[c3 & 0x1f], # bits 36 - 40 (1 - 5)
|
||||
])
|
||||
# Adjust for any leftover partial quanta
|
||||
if leftover == 1:
|
||||
encoded = encoded[:-6] + b'======'
|
||||
elif leftover == 2:
|
||||
encoded = encoded[:-4] + b'===='
|
||||
elif leftover == 3:
|
||||
encoded = encoded[:-3] + b'==='
|
||||
elif leftover == 4:
|
||||
encoded = encoded[:-1] + b'='
|
||||
return bytes(encoded)
|
||||
|
||||
|
||||
def b32decode(s, casefold=False, map01=None):
|
||||
"""Decode a Base32 encoded byte string.
|
||||
|
||||
s is the byte string to decode. Optional casefold is a flag
|
||||
specifying whether a lowercase alphabet is acceptable as input.
|
||||
For security purposes, the default is False.
|
||||
|
||||
RFC 3548 allows for optional mapping of the digit 0 (zero) to the
|
||||
letter O (oh), and for optional mapping of the digit 1 (one) to
|
||||
either the letter I (eye) or letter L (el). The optional argument
|
||||
map01 when not None, specifies which letter the digit 1 should be
|
||||
mapped to (when map01 is not None, the digit 0 is always mapped to
|
||||
the letter O). For security purposes the default is None, so that
|
||||
0 and 1 are not allowed in the input.
|
||||
|
||||
The decoded byte string is returned. binascii.Error is raised if
|
||||
the input is incorrectly padded or if there are non-alphabet
|
||||
characters present in the input.
|
||||
"""
|
||||
s = _bytes_from_decode_data(s)
|
||||
quanta, leftover = divmod(len(s), 8)
|
||||
if leftover:
|
||||
raise binascii.Error('Incorrect padding')
|
||||
# Handle section 2.4 zero and one mapping. The flag map01 will be either
|
||||
# False, or the character to map the digit 1 (one) to. It should be
|
||||
# either L (el) or I (eye).
|
||||
if map01 is not None:
|
||||
map01 = _bytes_from_decode_data(map01)
|
||||
assert len(map01) == 1, repr(map01)
|
||||
s = s.translate(bytes.maketrans(b'01', b'O' + map01))
|
||||
if casefold:
|
||||
s = s.upper()
|
||||
# Strip off pad characters from the right. We need to count the pad
|
||||
# characters because this will tell us how many null bytes to remove from
|
||||
# the end of the decoded string.
|
||||
padchars = s.find(b'=')
|
||||
if padchars > 0:
|
||||
padchars = len(s) - padchars
|
||||
s = s[:-padchars]
|
||||
else:
|
||||
padchars = 0
|
||||
|
||||
# Now decode the full quanta
|
||||
parts = []
|
||||
acc = 0
|
||||
shift = 35
|
||||
for c in s:
|
||||
val = _b32rev.get(c)
|
||||
if val is None:
|
||||
raise binascii.Error('Non-base32 digit found')
|
||||
acc += _b32rev[c] << shift
|
||||
shift -= 5
|
||||
if shift < 0:
|
||||
parts.append(binascii.unhexlify(bytes('%010x' % acc, "ascii")))
|
||||
acc = 0
|
||||
shift = 35
|
||||
# Process the last, partial quanta
|
||||
last = binascii.unhexlify(bytes('%010x' % acc, "ascii"))
|
||||
if padchars == 0:
|
||||
last = b'' # No characters
|
||||
elif padchars == 1:
|
||||
last = last[:-1]
|
||||
elif padchars == 3:
|
||||
last = last[:-2]
|
||||
elif padchars == 4:
|
||||
last = last[:-3]
|
||||
elif padchars == 6:
|
||||
last = last[:-4]
|
||||
else:
|
||||
raise binascii.Error('Incorrect padding')
|
||||
parts.append(last)
|
||||
return b''.join(parts)
|
||||
|
||||
|
||||
|
||||
# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
|
||||
# lowercase. The RFC also recommends against accepting input case
|
||||
# insensitively.
|
||||
def b16encode(s):
|
||||
"""Encode a byte string using Base16.
|
||||
|
||||
s is the byte string to encode. The encoded byte string is returned.
|
||||
"""
|
||||
if not isinstance(s, bytes_types):
|
||||
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
|
||||
return binascii.hexlify(s).upper()
|
||||
|
||||
|
||||
def b16decode(s, casefold=False):
|
||||
"""Decode a Base16 encoded byte string.
|
||||
|
||||
s is the byte string to decode. Optional casefold is a flag
|
||||
specifying whether a lowercase alphabet is acceptable as input.
|
||||
For security purposes, the default is False.
|
||||
|
||||
The decoded byte string is returned. binascii.Error is raised if
|
||||
s were incorrectly padded or if there are non-alphabet characters
|
||||
present in the string.
|
||||
"""
|
||||
s = _bytes_from_decode_data(s)
|
||||
if casefold:
|
||||
s = s.upper()
|
||||
if re.search(b'[^0-9A-F]', s):
|
||||
raise binascii.Error('Non-base16 digit found')
|
||||
return binascii.unhexlify(s)
|
||||
|
||||
|
||||
|
||||
# Legacy interface. This code could be cleaned up since I don't believe
|
||||
# binascii has any line length limitations. It just doesn't seem worth it
|
||||
# though. The files should be opened in binary mode.
|
||||
|
||||
MAXLINESIZE = 76 # Excluding the CRLF
|
||||
MAXBINSIZE = (MAXLINESIZE//4)*3
|
||||
|
||||
def encode(input, output):
|
||||
"""Encode a file; input and output are binary files."""
|
||||
while True:
|
||||
s = input.read(MAXBINSIZE)
|
||||
if not s:
|
||||
break
|
||||
while len(s) < MAXBINSIZE:
|
||||
ns = input.read(MAXBINSIZE-len(s))
|
||||
if not ns:
|
||||
break
|
||||
s += ns
|
||||
line = binascii.b2a_base64(s)
|
||||
output.write(line)
|
||||
|
||||
|
||||
def decode(input, output):
|
||||
"""Decode a file; input and output are binary files."""
|
||||
while True:
|
||||
line = input.readline()
|
||||
if not line:
|
||||
break
|
||||
s = binascii.a2b_base64(line)
|
||||
output.write(s)
|
||||
|
||||
|
||||
def encodebytes(s):
|
||||
"""Encode a bytestring into a bytestring containing multiple lines
|
||||
of base-64 data."""
|
||||
if not isinstance(s, bytes_types):
|
||||
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
|
||||
pieces = []
|
||||
for i in range(0, len(s), MAXBINSIZE):
|
||||
chunk = s[i : i + MAXBINSIZE]
|
||||
pieces.append(binascii.b2a_base64(chunk))
|
||||
return b"".join(pieces)
|
||||
|
||||
def encodestring(s):
|
||||
"""Legacy alias of encodebytes()."""
|
||||
import warnings
|
||||
warnings.warn("encodestring() is a deprecated alias, use encodebytes()",
|
||||
DeprecationWarning, 2)
|
||||
return encodebytes(s)
|
||||
|
||||
|
||||
def decodebytes(s):
|
||||
"""Decode a bytestring of base-64 data into a bytestring."""
|
||||
if not isinstance(s, bytes_types):
|
||||
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
|
||||
return binascii.a2b_base64(s)
|
||||
|
||||
def decodestring(s):
|
||||
"""Legacy alias of decodebytes()."""
|
||||
import warnings
|
||||
warnings.warn("decodestring() is a deprecated alias, use decodebytes()",
|
||||
DeprecationWarning, 2)
|
||||
return decodebytes(s)
|
||||
|
||||
|
||||
# Usable as a script...
|
||||
def main():
|
||||
"""Small main program"""
|
||||
import sys, getopt
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], 'deut')
|
||||
except getopt.error as msg:
|
||||
sys.stdout = sys.stderr
|
||||
print(msg)
|
||||
print("""usage: %s [-d|-e|-u|-t] [file|-]
|
||||
-d, -u: decode
|
||||
-e: encode (default)
|
||||
-t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0])
|
||||
sys.exit(2)
|
||||
func = encode
|
||||
for o, a in opts:
|
||||
if o == '-e': func = encode
|
||||
if o == '-d': func = decode
|
||||
if o == '-u': func = decode
|
||||
if o == '-t': test(); return
|
||||
if args and args[0] != '-':
|
||||
with open(args[0], 'rb') as f:
|
||||
func(f, sys.stdout.buffer)
|
||||
else:
|
||||
func(sys.stdin.buffer, sys.stdout.buffer)
|
||||
|
||||
|
||||
def test():
|
||||
s0 = b"Aladdin:open sesame"
|
||||
print(repr(s0))
|
||||
s1 = encodebytes(s0)
|
||||
print(repr(s1))
|
||||
s2 = decodebytes(s1)
|
||||
print(repr(s2))
|
||||
assert s0 == s2
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,38 @@
|
|||
#
|
||||
# This is validation script for "boom" tool https://github.com/tarekziade/boom
|
||||
# To use it:
|
||||
#
|
||||
# boom -n1000 --post-hook=boom_uasyncio.validate <rest of boom args>
|
||||
#
|
||||
# Note that if you'll use other -n value, you should update NUM_REQS below
|
||||
# to match.
|
||||
#
|
||||
|
||||
NUM_REQS = 1000
|
||||
seen = []
|
||||
cnt = 0
|
||||
|
||||
def validate(resp):
|
||||
global cnt
|
||||
t = resp.text
|
||||
l = t.split("\r\n", 1)[0]
|
||||
no = int(l.split()[1])
|
||||
seen.append(no)
|
||||
c = t.count(l + "\r\n")
|
||||
assert c == 400101, str(c)
|
||||
assert t.endswith("=== END ===")
|
||||
|
||||
cnt += 1
|
||||
if cnt == NUM_REQS:
|
||||
seen.sort()
|
||||
print
|
||||
print seen
|
||||
print
|
||||
el = None
|
||||
for i in seen:
|
||||
if el is None:
|
||||
el = i
|
||||
else:
|
||||
el += 1
|
||||
assert i == el
|
||||
return resp
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# This in one-shot scripts to test "light load" uasyncio HTTP server using
|
||||
# Apache Bench (ab).
|
||||
#
|
||||
|
||||
#python3.4.2 test_http_server_light.py &
|
||||
#micropython -O test_http_server_light.py &
|
||||
|
||||
#python3.4.2 test_http_server_medium.py &
|
||||
micropython -O -X heapsize=200wK test_http_server_medium.py &
|
||||
|
||||
sleep 1
|
||||
|
||||
ab -n10000 -c100 http://127.0.0.1:8081/
|
||||
|
||||
kill %1
|
|
@ -0,0 +1,28 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# This in one-shot scripts to test "heavy load" uasyncio HTTP server using
|
||||
# Boom tool https://github.com/tarekziade/boom .
|
||||
#
|
||||
# Note that this script doesn't test performance, but rather test functional
|
||||
# correctness of uasyncio server implementation, while serving large amounts
|
||||
# of data (guaranteedly more than a socket buffer). Thus, this script should
|
||||
# not be used for benchmarking.
|
||||
#
|
||||
|
||||
if [ ! -d .venv-boom ]; then
|
||||
virtualenv .venv-boom
|
||||
. .venv-boom/bin/activate
|
||||
# PyPI currently has 0.8 which is too old
|
||||
#pip install boom
|
||||
pip install git+https://github.com/tarekziade/boom
|
||||
else
|
||||
. .venv-boom/bin/activate
|
||||
fi
|
||||
|
||||
|
||||
micropython -X heapsize=300000000 -O test_http_server_heavy.py &
|
||||
sleep 1
|
||||
|
||||
PYTHONPATH=. boom -n1000 -c30 http://localhost:8081 --post-hook=boom_uasyncio.validate
|
||||
|
||||
kill %1
|
|
@ -0,0 +1,40 @@
|
|||
import uasyncio as asyncio
|
||||
import signal
|
||||
import errno
|
||||
|
||||
|
||||
cnt = 0
|
||||
|
||||
@asyncio.coroutine
|
||||
def serve(reader, writer):
|
||||
global cnt
|
||||
#s = "Hello.\r\n"
|
||||
s = "Hello. %07d\r\n" % cnt
|
||||
cnt += 1
|
||||
yield from reader.read()
|
||||
yield from writer.awrite("HTTP/1.0 200 OK\r\n\r\n")
|
||||
try:
|
||||
yield from writer.awrite(s)
|
||||
yield from writer.awrite(s * 100)
|
||||
yield from writer.awrite(s * 400000)
|
||||
yield from writer.awrite("=== END ===")
|
||||
except OSError as e:
|
||||
if e.args[0] == errno.EPIPE:
|
||||
print("EPIPE")
|
||||
elif e.args[0] == errno.ECONNRESET:
|
||||
print("ECONNRESET")
|
||||
else:
|
||||
raise
|
||||
finally:
|
||||
yield from writer.aclose()
|
||||
|
||||
|
||||
import logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
#logging.basicConfig(level=logging.DEBUG)
|
||||
signal.signal(signal.SIGPIPE, signal.SIG_IGN)
|
||||
loop = asyncio.get_event_loop()
|
||||
#mem_info()
|
||||
loop.call_soon(asyncio.start_server(serve, "0.0.0.0", 8081, backlog=100))
|
||||
loop.run_forever()
|
||||
loop.close()
|
|
@ -0,0 +1,21 @@
|
|||
import uasyncio as asyncio
|
||||
|
||||
|
||||
@asyncio.coroutine
|
||||
def serve(reader, writer):
|
||||
#print(reader, writer)
|
||||
#print("================")
|
||||
yield from reader.read(512)
|
||||
yield from writer.awrite("HTTP/1.0 200 OK\r\n\r\nHello.\r\n")
|
||||
yield from writer.aclose()
|
||||
#print("Finished processing request")
|
||||
|
||||
|
||||
import logging
|
||||
#logging.basicConfig(level=logging.INFO)
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
loop = asyncio.get_event_loop()
|
||||
#mem_info()
|
||||
loop.create_task(asyncio.start_server(serve, "127.0.0.1", 8081, backlog=100))
|
||||
loop.run_forever()
|
||||
loop.close()
|
|
@ -0,0 +1,22 @@
|
|||
import uasyncio as asyncio
|
||||
|
||||
resp = "HTTP/1.0 200 OK\r\n\r\n" + "Hello.\r\n" * 1500
|
||||
|
||||
@asyncio.coroutine
|
||||
def serve(reader, writer):
|
||||
#print(reader, writer)
|
||||
#print("================")
|
||||
yield from reader.read(512)
|
||||
yield from writer.awrite(resp)
|
||||
yield from writer.aclose()
|
||||
#print("Finished processing request")
|
||||
|
||||
|
||||
import logging
|
||||
#logging.basicConfig(level=logging.INFO)
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
loop = asyncio.get_event_loop(80)
|
||||
#mem_info()
|
||||
loop.create_task(asyncio.start_server(serve, "127.0.0.1", 8081, backlog=100))
|
||||
loop.run_forever()
|
||||
loop.close()
|
|
@ -0,0 +1,113 @@
|
|||
from ubinascii import *
|
||||
|
||||
if not "unhexlify" in globals():
|
||||
def unhexlify(data):
|
||||
if len(data) % 2 != 0:
|
||||
raise ValueError("Odd-length string")
|
||||
|
||||
return bytes([ int(data[i:i+2], 16) for i in range(0, len(data), 2) ])
|
||||
|
||||
b2a_hex = hexlify
|
||||
a2b_hex = unhexlify
|
||||
|
||||
# ____________________________________________________________
|
||||
|
||||
PAD = '='
|
||||
|
||||
table_a2b_base64 = [
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
|
||||
52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1, # Note PAD->-1 here
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
|
||||
15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
|
||||
-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
|
||||
41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
|
||||
]
|
||||
def _transform(n):
|
||||
if n == -1:
|
||||
return '\xff'
|
||||
else:
|
||||
return chr(n)
|
||||
table_a2b_base64 = ''.join(map(_transform, table_a2b_base64))
|
||||
assert len(table_a2b_base64) == 256
|
||||
|
||||
def a2b_base64(ascii):
|
||||
"Decode a line of base64 data."
|
||||
|
||||
res = []
|
||||
quad_pos = 0
|
||||
leftchar = 0
|
||||
leftbits = 0
|
||||
last_char_was_a_pad = False
|
||||
|
||||
for c in ascii:
|
||||
c = chr(c)
|
||||
if c == PAD:
|
||||
if quad_pos > 2 or (quad_pos == 2 and last_char_was_a_pad):
|
||||
break # stop on 'xxx=' or on 'xx=='
|
||||
last_char_was_a_pad = True
|
||||
else:
|
||||
n = ord(table_a2b_base64[ord(c)])
|
||||
if n == 0xff:
|
||||
continue # ignore strange characters
|
||||
#
|
||||
# Shift it in on the low end, and see if there's
|
||||
# a byte ready for output.
|
||||
quad_pos = (quad_pos + 1) & 3
|
||||
leftchar = (leftchar << 6) | n
|
||||
leftbits += 6
|
||||
#
|
||||
if leftbits >= 8:
|
||||
leftbits -= 8
|
||||
res.append((leftchar >> leftbits).to_bytes(1, 'big'))
|
||||
leftchar &= ((1 << leftbits) - 1)
|
||||
#
|
||||
last_char_was_a_pad = False
|
||||
else:
|
||||
if leftbits != 0:
|
||||
raise Exception("Incorrect padding")
|
||||
|
||||
return b''.join(res)
|
||||
|
||||
# ____________________________________________________________
|
||||
|
||||
table_b2a_base64 = (
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
|
||||
|
||||
def b2a_base64(bin):
|
||||
"Base64-code line of data."
|
||||
|
||||
newlength = (len(bin) + 2) // 3
|
||||
newlength = newlength * 4 + 1
|
||||
res = []
|
||||
|
||||
leftchar = 0
|
||||
leftbits = 0
|
||||
for c in bin:
|
||||
# Shift into our buffer, and output any 6bits ready
|
||||
leftchar = (leftchar << 8) | c
|
||||
leftbits += 8
|
||||
res.append(table_b2a_base64[(leftchar >> (leftbits-6)) & 0x3f])
|
||||
leftbits -= 6
|
||||
if leftbits >= 6:
|
||||
res.append(table_b2a_base64[(leftchar >> (leftbits-6)) & 0x3f])
|
||||
leftbits -= 6
|
||||
#
|
||||
if leftbits == 2:
|
||||
res.append(table_b2a_base64[(leftchar & 3) << 4])
|
||||
res.append(PAD)
|
||||
res.append(PAD)
|
||||
elif leftbits == 4:
|
||||
res.append(table_b2a_base64[(leftchar & 0xf) << 2])
|
||||
res.append(PAD)
|
||||
res.append('\n')
|
||||
return ''.join(res).encode('ascii')
|
|
@ -0,0 +1,92 @@
|
|||
"""Bisection algorithms."""
|
||||
|
||||
def insort_right(a, x, lo=0, hi=None):
|
||||
"""Insert item x in list a, and keep it sorted assuming a is sorted.
|
||||
|
||||
If x is already in a, insert it to the right of the rightmost x.
|
||||
|
||||
Optional args lo (default 0) and hi (default len(a)) bound the
|
||||
slice of a to be searched.
|
||||
"""
|
||||
|
||||
if lo < 0:
|
||||
raise ValueError('lo must be non-negative')
|
||||
if hi is None:
|
||||
hi = len(a)
|
||||
while lo < hi:
|
||||
mid = (lo+hi)//2
|
||||
if x < a[mid]: hi = mid
|
||||
else: lo = mid+1
|
||||
a.insert(lo, x)
|
||||
|
||||
insort = insort_right # backward compatibility
|
||||
|
||||
def bisect_right(a, x, lo=0, hi=None):
|
||||
"""Return the index where to insert item x in list a, assuming a is sorted.
|
||||
|
||||
The return value i is such that all e in a[:i] have e <= x, and all e in
|
||||
a[i:] have e > x. So if x already appears in the list, a.insert(x) will
|
||||
insert just after the rightmost x already there.
|
||||
|
||||
Optional args lo (default 0) and hi (default len(a)) bound the
|
||||
slice of a to be searched.
|
||||
"""
|
||||
|
||||
if lo < 0:
|
||||
raise ValueError('lo must be non-negative')
|
||||
if hi is None:
|
||||
hi = len(a)
|
||||
while lo < hi:
|
||||
mid = (lo+hi)//2
|
||||
if x < a[mid]: hi = mid
|
||||
else: lo = mid+1
|
||||
return lo
|
||||
|
||||
bisect = bisect_right # backward compatibility
|
||||
|
||||
def insort_left(a, x, lo=0, hi=None):
|
||||
"""Insert item x in list a, and keep it sorted assuming a is sorted.
|
||||
|
||||
If x is already in a, insert it to the left of the leftmost x.
|
||||
|
||||
Optional args lo (default 0) and hi (default len(a)) bound the
|
||||
slice of a to be searched.
|
||||
"""
|
||||
|
||||
if lo < 0:
|
||||
raise ValueError('lo must be non-negative')
|
||||
if hi is None:
|
||||
hi = len(a)
|
||||
while lo < hi:
|
||||
mid = (lo+hi)//2
|
||||
if a[mid] < x: lo = mid+1
|
||||
else: hi = mid
|
||||
a.insert(lo, x)
|
||||
|
||||
|
||||
def bisect_left(a, x, lo=0, hi=None):
|
||||
"""Return the index where to insert item x in list a, assuming a is sorted.
|
||||
|
||||
The return value i is such that all e in a[:i] have e < x, and all e in
|
||||
a[i:] have e >= x. So if x already appears in the list, a.insert(x) will
|
||||
insert just before the leftmost x already there.
|
||||
|
||||
Optional args lo (default 0) and hi (default len(a)) bound the
|
||||
slice of a to be searched.
|
||||
"""
|
||||
|
||||
if lo < 0:
|
||||
raise ValueError('lo must be non-negative')
|
||||
if hi is None:
|
||||
hi = len(a)
|
||||
while lo < hi:
|
||||
mid = (lo+hi)//2
|
||||
if a[mid] < x: lo = mid+1
|
||||
else: hi = mid
|
||||
return lo
|
||||
|
||||
# Overwrite above definitions with a fast C implementation
|
||||
try:
|
||||
from _bisect import *
|
||||
except ImportError:
|
||||
pass
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,337 @@
|
|||
"""A generic class to build line-oriented command interpreters.
|
||||
|
||||
Interpreters constructed with this class obey the following conventions:
|
||||
|
||||
1. End of file on input is processed as the command 'EOF'.
|
||||
2. A command is parsed out of each line by collecting the prefix composed
|
||||
of characters in the identchars member.
|
||||
3. A command `foo' is dispatched to a method 'do_foo()'; the do_ method
|
||||
is passed a single argument consisting of the remainder of the line.
|
||||
4. Typing an empty line repeats the last command. (Actually, it calls the
|
||||
method `emptyline', which may be overridden in a subclass.)
|
||||
5. There is a predefined `help' method. Given an argument `topic', it
|
||||
calls the command `help_topic'. With no arguments, it lists all topics
|
||||
with defined help_ functions, broken into up to three topics; documented
|
||||
commands, miscellaneous help topics, and undocumented commands.
|
||||
6. The command '?' is a synonym for `help'. The command '!' is a synonym
|
||||
for `shell', if a do_shell method exists.
|
||||
7. If completion is enabled, completing commands will be done automatically,
|
||||
and completing of commands args is done by calling complete_foo() with
|
||||
arguments text, line, begidx, endidx. text is string we are matching
|
||||
against, all returned matches must begin with it. line is the current
|
||||
input line (lstripped), begidx and endidx are the beginning and end
|
||||
indexes of the text being matched, which could be used to provide
|
||||
different completion depending upon which position the argument is in.
|
||||
|
||||
The `default' method may be overridden to intercept commands for which there
|
||||
is no do_ method.
|
||||
|
||||
The `completedefault' method may be overridden to intercept completions for
|
||||
commands that have no complete_ method.
|
||||
|
||||
The data member `self.ruler' sets the character used to draw separator lines
|
||||
in the help messages. If empty, no ruler line is drawn. It defaults to "=".
|
||||
|
||||
If the value of `self.intro' is nonempty when the cmdloop method is called,
|
||||
it is printed out on interpreter startup. This value may be overridden
|
||||
via an optional argument to the cmdloop() method.
|
||||
|
||||
The data members `self.doc_header', `self.misc_header', and
|
||||
`self.undoc_header' set the headers used for the help function's
|
||||
listings of documented functions, miscellaneous topics, and undocumented
|
||||
functions respectively.
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
This is a copy of python's Cmd, but leaves out features that aren't relevant
|
||||
or can't currently be implemented for MicroPython.
|
||||
|
||||
One of the notable deviations is that since MicroPython strips doc strings,
|
||||
this means that that help by doc string feature doesn't work.
|
||||
|
||||
completions have also been stripped out.
|
||||
"""
|
||||
|
||||
#import string, sys
|
||||
import sys # MiroPython doesn't yet have a string module
|
||||
|
||||
__all__ = ["Cmd"]
|
||||
|
||||
PROMPT = '(Cmd) '
|
||||
#IDENTCHARS = string.ascii_letters + string.digits + '_'
|
||||
IDENTCHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
|
||||
|
||||
class Cmd:
|
||||
"""A simple framework for writing line-oriented command interpreters.
|
||||
|
||||
These are often useful for test harnesses, administrative tools, and
|
||||
prototypes that will later be wrapped in a more sophisticated interface.
|
||||
|
||||
A Cmd instance or subclass instance is a line-oriented interpreter
|
||||
framework. There is no good reason to instantiate Cmd itself; rather,
|
||||
it's useful as a superclass of an interpreter class you define yourself
|
||||
in order to inherit Cmd's methods and encapsulate action methods.
|
||||
|
||||
"""
|
||||
prompt = PROMPT
|
||||
identchars = IDENTCHARS
|
||||
ruler = '='
|
||||
lastcmd = ''
|
||||
intro = None
|
||||
doc_leader = ""
|
||||
doc_header = "Documented commands (type help <topic>):"
|
||||
misc_header = "Miscellaneous help topics:"
|
||||
undoc_header = "Undocumented commands:"
|
||||
nohelp = "*** No help on %s"
|
||||
use_rawinput = 1
|
||||
|
||||
def __init__(self, stdin=None, stdout=None):
|
||||
"""Instantiate a line-oriented interpreter framework.
|
||||
|
||||
The optional arguments stdin and stdout
|
||||
specify alternate input and output file objects; if not specified,
|
||||
sys.stdin and sys.stdout are used.
|
||||
|
||||
"""
|
||||
if stdin is not None:
|
||||
self.stdin = stdin
|
||||
else:
|
||||
self.stdin = sys.stdin
|
||||
if stdout is not None:
|
||||
self.stdout = stdout
|
||||
else:
|
||||
self.stdout = sys.stdout
|
||||
self.cmdqueue = []
|
||||
|
||||
def cmdloop(self, intro=None):
|
||||
"""Repeatedly issue a prompt, accept input, parse an initial prefix
|
||||
off the received input, and dispatch to action methods, passing them
|
||||
the remainder of the line as argument.
|
||||
|
||||
"""
|
||||
|
||||
self.preloop()
|
||||
try:
|
||||
if intro is not None:
|
||||
self.intro = intro
|
||||
if self.intro:
|
||||
self.stdout.write(str(self.intro)+"\n")
|
||||
stop = None
|
||||
while not stop:
|
||||
if self.cmdqueue:
|
||||
line = self.cmdqueue.pop(0)
|
||||
else:
|
||||
if self.use_rawinput:
|
||||
try:
|
||||
line = input(self.prompt)
|
||||
except EOFError:
|
||||
line = 'EOF'
|
||||
else:
|
||||
self.stdout.write(self.prompt)
|
||||
self.stdout.flush()
|
||||
line = self.stdin.readline()
|
||||
if not len(line):
|
||||
line = 'EOF'
|
||||
else:
|
||||
line = line.rstrip('\r\n')
|
||||
line = self.precmd(line)
|
||||
stop = self.onecmd(line)
|
||||
stop = self.postcmd(stop, line)
|
||||
self.postloop()
|
||||
finally:
|
||||
pass
|
||||
|
||||
def precmd(self, line):
|
||||
"""Hook method executed just before the command line is
|
||||
interpreted, but after the input prompt is generated and issued.
|
||||
|
||||
"""
|
||||
return line
|
||||
|
||||
def postcmd(self, stop, line):
|
||||
"""Hook method executed just after a command dispatch is finished."""
|
||||
return stop
|
||||
|
||||
def preloop(self):
|
||||
"""Hook method executed once when the cmdloop() method is called."""
|
||||
pass
|
||||
|
||||
def postloop(self):
|
||||
"""Hook method executed once when the cmdloop() method is about to
|
||||
return.
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
def parseline(self, line):
|
||||
"""Parse the line into a command name and a string containing
|
||||
the arguments. Returns a tuple containing (command, args, line).
|
||||
'command' and 'args' may be None if the line couldn't be parsed.
|
||||
"""
|
||||
line = line.strip()
|
||||
if not line:
|
||||
return None, None, line
|
||||
elif line[0] == '?':
|
||||
line = 'help ' + line[1:]
|
||||
elif line[0] == '!':
|
||||
if hasattr(self, 'do_shell'):
|
||||
line = 'shell ' + line[1:]
|
||||
else:
|
||||
return None, None, line
|
||||
i, n = 0, len(line)
|
||||
while i < n and line[i] in self.identchars: i = i+1
|
||||
cmd, arg = line[:i], line[i:].strip()
|
||||
return cmd, arg, line
|
||||
|
||||
def onecmd(self, line):
|
||||
"""Interpret the argument as though it had been typed in response
|
||||
to the prompt.
|
||||
|
||||
This may be overridden, but should not normally need to be;
|
||||
see the precmd() and postcmd() methods for useful execution hooks.
|
||||
The return value is a flag indicating whether interpretation of
|
||||
commands by the interpreter should stop.
|
||||
|
||||
"""
|
||||
cmd, arg, line = self.parseline(line)
|
||||
if not line:
|
||||
return self.emptyline()
|
||||
if cmd is None:
|
||||
return self.default(line)
|
||||
self.lastcmd = line
|
||||
if line == 'EOF' :
|
||||
self.lastcmd = ''
|
||||
if cmd == '':
|
||||
return self.default(line)
|
||||
else:
|
||||
try:
|
||||
func = getattr(self, 'do_' + cmd)
|
||||
except AttributeError:
|
||||
return self.default(line)
|
||||
return func(arg)
|
||||
|
||||
def emptyline(self):
|
||||
"""Called when an empty line is entered in response to the prompt.
|
||||
|
||||
If this method is not overridden, it repeats the last nonempty
|
||||
command entered.
|
||||
|
||||
"""
|
||||
if self.lastcmd:
|
||||
return self.onecmd(self.lastcmd)
|
||||
|
||||
def default(self, line):
|
||||
"""Called on an input line when the command prefix is not recognized.
|
||||
|
||||
If this method is not overridden, it prints an error message and
|
||||
returns.
|
||||
|
||||
"""
|
||||
self.stdout.write('*** Unknown syntax: %s\n'%line)
|
||||
|
||||
def get_names(self):
|
||||
# This method used to pull in base class attributes
|
||||
# at a time dir() didn't do it yet.
|
||||
return dir(self.__class__)
|
||||
|
||||
def do_help(self, arg):
|
||||
'List available commands with "help" or detailed help with "help cmd".'
|
||||
if arg:
|
||||
# XXX check arg syntax
|
||||
try:
|
||||
func = getattr(self, 'help_' + arg)
|
||||
except AttributeError:
|
||||
self.stdout.write("%s\n"%str(self.nohelp % (arg,)))
|
||||
return
|
||||
func()
|
||||
else:
|
||||
names = self.get_names()
|
||||
cmds_doc = []
|
||||
cmds_undoc = []
|
||||
help = {}
|
||||
for name in names:
|
||||
if name[:5] == 'help_':
|
||||
help[name[5:]]=1
|
||||
names.sort()
|
||||
# There can be duplicates if routines overridden
|
||||
prevname = ''
|
||||
for name in names:
|
||||
if name[:3] == 'do_':
|
||||
if name == prevname:
|
||||
continue
|
||||
prevname = name
|
||||
cmd=name[3:]
|
||||
if cmd in help:
|
||||
cmds_doc.append(cmd)
|
||||
del help[cmd]
|
||||
else:
|
||||
cmds_undoc.append(cmd)
|
||||
self.stdout.write("%s\n"%str(self.doc_leader))
|
||||
self.print_topics(self.doc_header, cmds_doc, 15,80)
|
||||
self.print_topics(self.misc_header, list(help.keys()),15,80)
|
||||
self.print_topics(self.undoc_header, cmds_undoc, 15,80)
|
||||
|
||||
def print_topics(self, header, cmds, cmdlen, maxcol):
|
||||
if cmds:
|
||||
self.stdout.write("%s\n"%str(header))
|
||||
if self.ruler:
|
||||
self.stdout.write("%s\n"%str(self.ruler * len(header)))
|
||||
self.columnize(cmds, maxcol-1)
|
||||
self.stdout.write("\n")
|
||||
|
||||
def columnize(self, list, displaywidth=80):
|
||||
"""Display a list of strings as a compact set of columns.
|
||||
|
||||
Each column is only as wide as necessary.
|
||||
Columns are separated by two spaces (one was not legible enough).
|
||||
"""
|
||||
if not list:
|
||||
self.stdout.write("<empty>\n")
|
||||
return
|
||||
|
||||
nonstrings = [i for i in range(len(list))
|
||||
if not isinstance(list[i], str)]
|
||||
if nonstrings:
|
||||
raise TypeError("list[i] not a string for i in %s"
|
||||
% ", ".join(map(str, nonstrings)))
|
||||
size = len(list)
|
||||
if size == 1:
|
||||
self.stdout.write('%s\n'%str(list[0]))
|
||||
return
|
||||
# Try every row count from 1 upwards
|
||||
for nrows in range(1, len(list)):
|
||||
ncols = (size+nrows-1) // nrows
|
||||
colwidths = []
|
||||
totwidth = -2
|
||||
for col in range(ncols):
|
||||
colwidth = 0
|
||||
for row in range(nrows):
|
||||
i = row + nrows*col
|
||||
if i >= size:
|
||||
break
|
||||
x = list[i]
|
||||
colwidth = max(colwidth, len(x))
|
||||
colwidths.append(colwidth)
|
||||
totwidth += colwidth + 2
|
||||
if totwidth > displaywidth:
|
||||
break
|
||||
if totwidth <= displaywidth:
|
||||
break
|
||||
else:
|
||||
nrows = len(list)
|
||||
ncols = 1
|
||||
colwidths = [0]
|
||||
for row in range(nrows):
|
||||
texts = []
|
||||
for col in range(ncols):
|
||||
i = row + nrows*col
|
||||
if i >= size:
|
||||
x = ""
|
||||
else:
|
||||
x = list[i]
|
||||
texts.append(x)
|
||||
while texts and not texts[-1]:
|
||||
del texts[-1]
|
||||
for col in range(len(texts)):
|
||||
#texts[col] = texts[col].ljust(colwidths[col])
|
||||
texts[col] = '%-*s' % (colwidths[col], texts[col])
|
||||
self.stdout.write("%s\n"%str(" ".join(texts)))
|
|
@ -0,0 +1,16 @@
|
|||
# Should be reimplemented for MicroPython
|
||||
# Reason:
|
||||
# CPython implementation brings in metaclasses and other bloat.
|
||||
# This is going to be just import-all for other modules in a namespace package
|
||||
from ucollections import *
|
||||
try:
|
||||
from .defaultdict import defaultdict
|
||||
except ImportError:
|
||||
pass
|
||||
try:
|
||||
from .deque import deque
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
class MutableMapping:
|
||||
pass
|
|
@ -0,0 +1,36 @@
|
|||
class defaultdict:
|
||||
|
||||
@staticmethod
|
||||
def __new__(cls, default_factory=None, **kwargs):
|
||||
# Some code (e.g. urllib.urlparse) expects that basic defaultdict
|
||||
# functionality will be available to subclasses without them
|
||||
# calling __init__().
|
||||
self = super(defaultdict, cls).__new__(cls)
|
||||
self.d = {}
|
||||
return self
|
||||
|
||||
def __init__(self, default_factory=None, **kwargs):
|
||||
self.d = kwargs
|
||||
self.default_factory = default_factory
|
||||
|
||||
def __getitem__(self, key):
|
||||
try:
|
||||
return self.d[key]
|
||||
except KeyError:
|
||||
v = self.__missing__(key)
|
||||
self.d[key] = v
|
||||
return v
|
||||
|
||||
def __setitem__(self, key, v):
|
||||
self.d[key] = v
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self.d[key]
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.d
|
||||
|
||||
def __missing__(self, key):
|
||||
if self.default_factory is None:
|
||||
raise KeyError(key)
|
||||
return self.default_factory()
|
|
@ -0,0 +1,37 @@
|
|||
class deque:
|
||||
|
||||
def __init__(self, iterable=None):
|
||||
if iterable is None:
|
||||
self.q = []
|
||||
else:
|
||||
self.q = list(iterable)
|
||||
|
||||
def popleft(self):
|
||||
return self.q.pop(0)
|
||||
|
||||
def popright(self):
|
||||
return self.q.pop()
|
||||
|
||||
def pop(self):
|
||||
return self.q.pop()
|
||||
|
||||
def append(self, a):
|
||||
self.q.append(a)
|
||||
|
||||
def appendleft(self, a):
|
||||
self.q.insert(0, a)
|
||||
|
||||
def extend(self, a):
|
||||
self.q.extend(a)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.q)
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.q)
|
||||
|
||||
def __iter__(self):
|
||||
yield from self.q
|
||||
|
||||
def __str__(self):
|
||||
return 'deque({})'.format(self.q)
|
|
@ -0,0 +1,166 @@
|
|||
"""Utilities for with-statement contexts. See PEP 343.
|
||||
|
||||
Original source code: https://hg.python.org/cpython/file/3.4/Lib/contextlib.py
|
||||
|
||||
Not implemented:
|
||||
- redirect_stdout;
|
||||
|
||||
"""
|
||||
|
||||
import sys
|
||||
from collections import deque
|
||||
from ucontextlib import *
|
||||
|
||||
|
||||
class closing(object):
|
||||
"""Context to automatically close something at the end of a block.
|
||||
|
||||
Code like this:
|
||||
|
||||
with closing(<module>.open(<arguments>)) as f:
|
||||
<block>
|
||||
|
||||
is equivalent to this:
|
||||
|
||||
f = <module>.open(<arguments>)
|
||||
try:
|
||||
<block>
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
"""
|
||||
def __init__(self, thing):
|
||||
self.thing = thing
|
||||
def __enter__(self):
|
||||
return self.thing
|
||||
def __exit__(self, *exc_info):
|
||||
self.thing.close()
|
||||
|
||||
|
||||
class suppress:
|
||||
"""Context manager to suppress specified exceptions
|
||||
|
||||
After the exception is suppressed, execution proceeds with the next
|
||||
statement following the with statement.
|
||||
|
||||
with suppress(FileNotFoundError):
|
||||
os.remove(somefile)
|
||||
# Execution still resumes here if the file was already removed
|
||||
"""
|
||||
|
||||
def __init__(self, *exceptions):
|
||||
self._exceptions = exceptions
|
||||
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, exctype, excinst, exctb):
|
||||
# Unlike isinstance and issubclass, CPython exception handling
|
||||
# currently only looks at the concrete type hierarchy (ignoring
|
||||
# the instance and subclass checking hooks). While Guido considers
|
||||
# that a bug rather than a feature, it's a fairly hard one to fix
|
||||
# due to various internal implementation details. suppress provides
|
||||
# the simpler issubclass based semantics, rather than trying to
|
||||
# exactly reproduce the limitations of the CPython interpreter.
|
||||
#
|
||||
# See http://bugs.python.org/issue12029 for more details
|
||||
return exctype is not None and issubclass(exctype, self._exceptions)
|
||||
|
||||
# Inspired by discussions on http://bugs.python.org/issue13585
|
||||
class ExitStack(object):
|
||||
"""Context manager for dynamic management of a stack of exit callbacks
|
||||
|
||||
For example:
|
||||
|
||||
with ExitStack() as stack:
|
||||
files = [stack.enter_context(open(fname)) for fname in filenames]
|
||||
# All opened files will automatically be closed at the end of
|
||||
# the with statement, even if attempts to open files later
|
||||
# in the list raise an exception
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
self._exit_callbacks = deque()
|
||||
|
||||
def pop_all(self):
|
||||
"""Preserve the context stack by transferring it to a new instance"""
|
||||
new_stack = type(self)()
|
||||
new_stack._exit_callbacks = self._exit_callbacks
|
||||
self._exit_callbacks = deque()
|
||||
return new_stack
|
||||
|
||||
def _push_cm_exit(self, cm, cm_exit):
|
||||
"""Helper to correctly register callbacks to __exit__ methods"""
|
||||
def _exit_wrapper(*exc_details):
|
||||
return cm_exit(cm, *exc_details)
|
||||
self.push(_exit_wrapper)
|
||||
|
||||
def push(self, exit):
|
||||
"""Registers a callback with the standard __exit__ method signature
|
||||
|
||||
Can suppress exceptions the same way __exit__ methods can.
|
||||
|
||||
Also accepts any object with an __exit__ method (registering a call
|
||||
to the method instead of the object itself)
|
||||
"""
|
||||
# We use an unbound method rather than a bound method to follow
|
||||
# the standard lookup behaviour for special methods
|
||||
_cb_type = type(exit)
|
||||
try:
|
||||
exit_method = _cb_type.__exit__
|
||||
except AttributeError:
|
||||
# Not a context manager, so assume its a callable
|
||||
self._exit_callbacks.append(exit)
|
||||
else:
|
||||
self._push_cm_exit(exit, exit_method)
|
||||
return exit # Allow use as a decorator
|
||||
|
||||
def callback(self, callback, *args, **kwds):
|
||||
"""Registers an arbitrary callback and arguments.
|
||||
|
||||
Cannot suppress exceptions.
|
||||
"""
|
||||
def _exit_wrapper(exc_type, exc, tb):
|
||||
callback(*args, **kwds)
|
||||
self.push(_exit_wrapper)
|
||||
return callback # Allow use as a decorator
|
||||
|
||||
def enter_context(self, cm):
|
||||
"""Enters the supplied context manager
|
||||
|
||||
If successful, also pushes its __exit__ method as a callback and
|
||||
returns the result of the __enter__ method.
|
||||
"""
|
||||
# We look up the special methods on the type to match the with statement
|
||||
_cm_type = type(cm)
|
||||
_exit = _cm_type.__exit__
|
||||
result = _cm_type.__enter__(cm)
|
||||
self._push_cm_exit(cm, _exit)
|
||||
return result
|
||||
|
||||
def close(self):
|
||||
"""Immediately unwind the context stack"""
|
||||
self.__exit__(None, None, None)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *exc_details):
|
||||
received_exc = exc_details[0] is not None
|
||||
# Callbacks are invoked in LIFO order to match the behaviour of
|
||||
# nested context managers
|
||||
suppressed_exc = False
|
||||
pending_raise = False
|
||||
while self._exit_callbacks:
|
||||
cb = self._exit_callbacks.pop()
|
||||
try:
|
||||
if cb(*exc_details):
|
||||
suppressed_exc = True
|
||||
pending_raise = False
|
||||
exc_details = (None, None, None)
|
||||
except:
|
||||
exc_details = sys.exc_info()
|
||||
pending_raise = True
|
||||
if pending_raise:
|
||||
raise exc_details[1]
|
||||
return received_exc and suppressed_exc
|
|
@ -0,0 +1,328 @@
|
|||
"""Generic (shallow and deep) copying operations.
|
||||
|
||||
Interface summary:
|
||||
|
||||
import copy
|
||||
|
||||
x = copy.copy(y) # make a shallow copy of y
|
||||
x = copy.deepcopy(y) # make a deep copy of y
|
||||
|
||||
For module specific errors, copy.Error is raised.
|
||||
|
||||
The difference between shallow and deep copying is only relevant for
|
||||
compound objects (objects that contain other objects, like lists or
|
||||
class instances).
|
||||
|
||||
- A shallow copy constructs a new compound object and then (to the
|
||||
extent possible) inserts *the same objects* into it that the
|
||||
original contains.
|
||||
|
||||
- A deep copy constructs a new compound object and then, recursively,
|
||||
inserts *copies* into it of the objects found in the original.
|
||||
|
||||
Two problems often exist with deep copy operations that don't exist
|
||||
with shallow copy operations:
|
||||
|
||||
a) recursive objects (compound objects that, directly or indirectly,
|
||||
contain a reference to themselves) may cause a recursive loop
|
||||
|
||||
b) because deep copy copies *everything* it may copy too much, e.g.
|
||||
administrative data structures that should be shared even between
|
||||
copies
|
||||
|
||||
Python's deep copy operation avoids these problems by:
|
||||
|
||||
a) keeping a table of objects already copied during the current
|
||||
copying pass
|
||||
|
||||
b) letting user-defined classes override the copying operation or the
|
||||
set of components copied
|
||||
|
||||
This version does not copy types like module, class, function, method,
|
||||
nor stack trace, stack frame, nor file, socket, window, nor array, nor
|
||||
any similar types.
|
||||
|
||||
Classes can use the same interfaces to control copying that they use
|
||||
to control pickling: they can define methods called __getinitargs__(),
|
||||
__getstate__() and __setstate__(). See the documentation for module
|
||||
"pickle" for information on these methods.
|
||||
"""
|
||||
|
||||
import types
|
||||
#import weakref
|
||||
#from copyreg import dispatch_table
|
||||
#import builtins
|
||||
|
||||
class Error(Exception):
|
||||
pass
|
||||
error = Error # backward compatibility
|
||||
|
||||
try:
|
||||
from org.python.core import PyStringMap
|
||||
except ImportError:
|
||||
PyStringMap = None
|
||||
|
||||
__all__ = ["Error", "copy", "deepcopy"]
|
||||
|
||||
def copy(x):
|
||||
"""Shallow copy operation on arbitrary Python objects.
|
||||
|
||||
See the module's __doc__ string for more info.
|
||||
"""
|
||||
|
||||
cls = type(x)
|
||||
|
||||
copier = _copy_dispatch.get(cls)
|
||||
if copier:
|
||||
return copier(x)
|
||||
|
||||
copier = getattr(cls, "__copy__", None)
|
||||
if copier:
|
||||
return copier(x)
|
||||
|
||||
raise Error("un(shallow)copyable object of type %s" % cls)
|
||||
|
||||
dispatch_table = {}
|
||||
reductor = dispatch_table.get(cls)
|
||||
if reductor:
|
||||
rv = reductor(x)
|
||||
else:
|
||||
reductor = getattr(x, "__reduce_ex__", None)
|
||||
if reductor:
|
||||
rv = reductor(2)
|
||||
else:
|
||||
reductor = getattr(x, "__reduce__", None)
|
||||
if reductor:
|
||||
rv = reductor()
|
||||
else:
|
||||
raise Error("un(shallow)copyable object of type %s" % cls)
|
||||
|
||||
return _reconstruct(x, rv, 0)
|
||||
|
||||
|
||||
_copy_dispatch = d = {}
|
||||
|
||||
def _copy_immutable(x):
|
||||
return x
|
||||
for t in (type(None), int, float, bool, str, tuple,
|
||||
type, range,
|
||||
types.BuiltinFunctionType, type(Ellipsis),
|
||||
types.FunctionType):
|
||||
d[t] = _copy_immutable
|
||||
t = getattr(types, "CodeType", None)
|
||||
if t is not None:
|
||||
d[t] = _copy_immutable
|
||||
#for name in ("complex", "unicode"):
|
||||
# t = getattr(builtins, name, None)
|
||||
# if t is not None:
|
||||
# d[t] = _copy_immutable
|
||||
|
||||
def _copy_with_constructor(x):
|
||||
return type(x)(x)
|
||||
for t in (list, dict, set):
|
||||
d[t] = _copy_with_constructor
|
||||
|
||||
def _copy_with_copy_method(x):
|
||||
return x.copy()
|
||||
if PyStringMap is not None:
|
||||
d[PyStringMap] = _copy_with_copy_method
|
||||
|
||||
del d
|
||||
|
||||
def deepcopy(x, memo=None, _nil=[]):
|
||||
"""Deep copy operation on arbitrary Python objects.
|
||||
|
||||
See the module's __doc__ string for more info.
|
||||
"""
|
||||
|
||||
if memo is None:
|
||||
memo = {}
|
||||
|
||||
d = id(x)
|
||||
y = memo.get(d, _nil)
|
||||
if y is not _nil:
|
||||
return y
|
||||
|
||||
cls = type(x)
|
||||
|
||||
copier = _deepcopy_dispatch.get(cls)
|
||||
if copier:
|
||||
y = copier(x, memo)
|
||||
else:
|
||||
try:
|
||||
issc = issubclass(cls, type)
|
||||
except TypeError: # cls is not a class (old Boost; see SF #502085)
|
||||
issc = 0
|
||||
if issc:
|
||||
y = _deepcopy_atomic(x, memo)
|
||||
else:
|
||||
copier = getattr(x, "__deepcopy__", None)
|
||||
if copier:
|
||||
y = copier(memo)
|
||||
else:
|
||||
reductor = dispatch_table.get(cls)
|
||||
if reductor:
|
||||
rv = reductor(x)
|
||||
else:
|
||||
reductor = getattr(x, "__reduce_ex__", None)
|
||||
if reductor:
|
||||
rv = reductor(2)
|
||||
else:
|
||||
reductor = getattr(x, "__reduce__", None)
|
||||
if reductor:
|
||||
rv = reductor()
|
||||
else:
|
||||
raise Error(
|
||||
"un(deep)copyable object of type %s" % cls)
|
||||
y = _reconstruct(x, rv, 1, memo)
|
||||
|
||||
# If is its own copy, don't memoize.
|
||||
if y is not x:
|
||||
memo[d] = y
|
||||
_keep_alive(x, memo) # Make sure x lives at least as long as d
|
||||
return y
|
||||
|
||||
_deepcopy_dispatch = d = {}
|
||||
|
||||
def _deepcopy_atomic(x, memo):
|
||||
return x
|
||||
d[type(None)] = _deepcopy_atomic
|
||||
d[type(Ellipsis)] = _deepcopy_atomic
|
||||
d[int] = _deepcopy_atomic
|
||||
d[float] = _deepcopy_atomic
|
||||
d[bool] = _deepcopy_atomic
|
||||
try:
|
||||
d[complex] = _deepcopy_atomic
|
||||
except NameError:
|
||||
pass
|
||||
d[bytes] = _deepcopy_atomic
|
||||
d[str] = _deepcopy_atomic
|
||||
try:
|
||||
d[types.CodeType] = _deepcopy_atomic
|
||||
except AttributeError:
|
||||
pass
|
||||
d[type] = _deepcopy_atomic
|
||||
d[range] = _deepcopy_atomic
|
||||
d[types.BuiltinFunctionType] = _deepcopy_atomic
|
||||
d[types.FunctionType] = _deepcopy_atomic
|
||||
#d[weakref.ref] = _deepcopy_atomic
|
||||
|
||||
def _deepcopy_list(x, memo):
|
||||
y = []
|
||||
memo[id(x)] = y
|
||||
for a in x:
|
||||
y.append(deepcopy(a, memo))
|
||||
return y
|
||||
d[list] = _deepcopy_list
|
||||
|
||||
def _deepcopy_tuple(x, memo):
|
||||
y = []
|
||||
for a in x:
|
||||
y.append(deepcopy(a, memo))
|
||||
# We're not going to put the tuple in the memo, but it's still important we
|
||||
# check for it, in case the tuple contains recursive mutable structures.
|
||||
try:
|
||||
return memo[id(x)]
|
||||
except KeyError:
|
||||
pass
|
||||
for i in range(len(x)):
|
||||
if x[i] is not y[i]:
|
||||
y = tuple(y)
|
||||
break
|
||||
else:
|
||||
y = x
|
||||
return y
|
||||
d[tuple] = _deepcopy_tuple
|
||||
|
||||
def _deepcopy_dict(x, memo):
|
||||
y = {}
|
||||
memo[id(x)] = y
|
||||
for key, value in x.items():
|
||||
y[deepcopy(key, memo)] = deepcopy(value, memo)
|
||||
return y
|
||||
d[dict] = _deepcopy_dict
|
||||
if PyStringMap is not None:
|
||||
d[PyStringMap] = _deepcopy_dict
|
||||
|
||||
def _deepcopy_method(x, memo): # Copy instance methods
|
||||
return type(x)(x.__func__, deepcopy(x.__self__, memo))
|
||||
_deepcopy_dispatch[types.MethodType] = _deepcopy_method
|
||||
|
||||
def _keep_alive(x, memo):
|
||||
"""Keeps a reference to the object x in the memo.
|
||||
|
||||
Because we remember objects by their id, we have
|
||||
to assure that possibly temporary objects are kept
|
||||
alive by referencing them.
|
||||
We store a reference at the id of the memo, which should
|
||||
normally not be used unless someone tries to deepcopy
|
||||
the memo itself...
|
||||
"""
|
||||
try:
|
||||
memo[id(memo)].append(x)
|
||||
except KeyError:
|
||||
# aha, this is the first one :-)
|
||||
memo[id(memo)]=[x]
|
||||
|
||||
def _reconstruct(x, info, deep, memo=None):
|
||||
if isinstance(info, str):
|
||||
return x
|
||||
assert isinstance(info, tuple)
|
||||
if memo is None:
|
||||
memo = {}
|
||||
n = len(info)
|
||||
assert n in (2, 3, 4, 5)
|
||||
callable, args = info[:2]
|
||||
if n > 2:
|
||||
state = info[2]
|
||||
else:
|
||||
state = {}
|
||||
if n > 3:
|
||||
listiter = info[3]
|
||||
else:
|
||||
listiter = None
|
||||
if n > 4:
|
||||
dictiter = info[4]
|
||||
else:
|
||||
dictiter = None
|
||||
if deep:
|
||||
args = deepcopy(args, memo)
|
||||
y = callable(*args)
|
||||
memo[id(x)] = y
|
||||
|
||||
if state:
|
||||
if deep:
|
||||
state = deepcopy(state, memo)
|
||||
if hasattr(y, '__setstate__'):
|
||||
y.__setstate__(state)
|
||||
else:
|
||||
if isinstance(state, tuple) and len(state) == 2:
|
||||
state, slotstate = state
|
||||
else:
|
||||
slotstate = None
|
||||
if state is not None:
|
||||
y.__dict__.update(state)
|
||||
if slotstate is not None:
|
||||
for key, value in slotstate.items():
|
||||
setattr(y, key, value)
|
||||
|
||||
if listiter is not None:
|
||||
for item in listiter:
|
||||
if deep:
|
||||
item = deepcopy(item, memo)
|
||||
y.append(item)
|
||||
if dictiter is not None:
|
||||
for key, value in dictiter:
|
||||
if deep:
|
||||
key = deepcopy(key, memo)
|
||||
value = deepcopy(value, memo)
|
||||
y[key] = value
|
||||
return y
|
||||
|
||||
del d
|
||||
|
||||
del types
|
||||
|
||||
# Helper for instance creation without calling __init__
|
||||
class _EmptyClass:
|
||||
pass
|
|
@ -0,0 +1,99 @@
|
|||
"""Constants and membership tests for ASCII characters"""
|
||||
|
||||
NUL = 0x00 # ^@
|
||||
SOH = 0x01 # ^A
|
||||
STX = 0x02 # ^B
|
||||
ETX = 0x03 # ^C
|
||||
EOT = 0x04 # ^D
|
||||
ENQ = 0x05 # ^E
|
||||
ACK = 0x06 # ^F
|
||||
BEL = 0x07 # ^G
|
||||
BS = 0x08 # ^H
|
||||
TAB = 0x09 # ^I
|
||||
HT = 0x09 # ^I
|
||||
LF = 0x0a # ^J
|
||||
NL = 0x0a # ^J
|
||||
VT = 0x0b # ^K
|
||||
FF = 0x0c # ^L
|
||||
CR = 0x0d # ^M
|
||||
SO = 0x0e # ^N
|
||||
SI = 0x0f # ^O
|
||||
DLE = 0x10 # ^P
|
||||
DC1 = 0x11 # ^Q
|
||||
DC2 = 0x12 # ^R
|
||||
DC3 = 0x13 # ^S
|
||||
DC4 = 0x14 # ^T
|
||||
NAK = 0x15 # ^U
|
||||
SYN = 0x16 # ^V
|
||||
ETB = 0x17 # ^W
|
||||
CAN = 0x18 # ^X
|
||||
EM = 0x19 # ^Y
|
||||
SUB = 0x1a # ^Z
|
||||
ESC = 0x1b # ^[
|
||||
FS = 0x1c # ^\
|
||||
GS = 0x1d # ^]
|
||||
RS = 0x1e # ^^
|
||||
US = 0x1f # ^_
|
||||
SP = 0x20 # space
|
||||
DEL = 0x7f # delete
|
||||
|
||||
controlnames = [
|
||||
"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
|
||||
"BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",
|
||||
"DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
|
||||
"CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
|
||||
"SP"
|
||||
]
|
||||
|
||||
def _ctoi(c):
|
||||
if type(c) == type(""):
|
||||
return ord(c)
|
||||
else:
|
||||
return c
|
||||
|
||||
def isalnum(c): return isalpha(c) or isdigit(c)
|
||||
def isalpha(c): return isupper(c) or islower(c)
|
||||
def isascii(c): return _ctoi(c) <= 127 # ?
|
||||
def isblank(c): return _ctoi(c) in (8,32)
|
||||
def iscntrl(c): return _ctoi(c) <= 31
|
||||
def isdigit(c): return _ctoi(c) >= 48 and _ctoi(c) <= 57
|
||||
def isgraph(c): return _ctoi(c) >= 33 and _ctoi(c) <= 126
|
||||
def islower(c): return _ctoi(c) >= 97 and _ctoi(c) <= 122
|
||||
def isprint(c): return _ctoi(c) >= 32 and _ctoi(c) <= 126
|
||||
def ispunct(c): return _ctoi(c) != 32 and not isalnum(c)
|
||||
def isspace(c): return _ctoi(c) in (9, 10, 11, 12, 13, 32)
|
||||
def isupper(c): return _ctoi(c) >= 65 and _ctoi(c) <= 90
|
||||
def isxdigit(c): return isdigit(c) or \
|
||||
(_ctoi(c) >= 65 and _ctoi(c) <= 70) or (_ctoi(c) >= 97 and _ctoi(c) <= 102)
|
||||
def isctrl(c): return _ctoi(c) < 32
|
||||
def ismeta(c): return _ctoi(c) > 127
|
||||
|
||||
def ascii(c):
|
||||
if type(c) == type(""):
|
||||
return chr(_ctoi(c) & 0x7f)
|
||||
else:
|
||||
return _ctoi(c) & 0x7f
|
||||
|
||||
def ctrl(c):
|
||||
if type(c) == type(""):
|
||||
return chr(_ctoi(c) & 0x1f)
|
||||
else:
|
||||
return _ctoi(c) & 0x1f
|
||||
|
||||
def alt(c):
|
||||
if type(c) == type(""):
|
||||
return chr(_ctoi(c) | 0x80)
|
||||
else:
|
||||
return _ctoi(c) | 0x80
|
||||
|
||||
def unctrl(c):
|
||||
bits = _ctoi(c)
|
||||
if bits == 0x7f:
|
||||
rep = "^?"
|
||||
elif isprint(bits & 0x7f):
|
||||
rep = chr(bits & 0x7f)
|
||||
else:
|
||||
rep = "^" + chr(((bits & 0x7f) | 0x20) + 0x20)
|
||||
if bits & 0x80:
|
||||
return "!" + rep
|
||||
return rep
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,221 @@
|
|||
""" Routines for manipulating RFC2047 encoded words.
|
||||
|
||||
This is currently a package-private API, but will be considered for promotion
|
||||
to a public API if there is demand.
|
||||
|
||||
"""
|
||||
|
||||
# An ecoded word looks like this:
|
||||
#
|
||||
# =?charset[*lang]?cte?encoded_string?=
|
||||
#
|
||||
# for more information about charset see the charset module. Here it is one
|
||||
# of the preferred MIME charset names (hopefully; you never know when parsing).
|
||||
# cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case). In
|
||||
# theory other letters could be used for other encodings, but in practice this
|
||||
# (almost?) never happens. There could be a public API for adding entries
|
||||
# to the CTE tables, but YAGNI for now. 'q' is Quoted Printable, 'b' is
|
||||
# Base64. The meaning of encoded_string should be obvious. 'lang' is optional
|
||||
# as indicated by the brackets (they are not part of the syntax) but is almost
|
||||
# never encountered in practice.
|
||||
#
|
||||
# The general interface for a CTE decoder is that it takes the encoded_string
|
||||
# as its argument, and returns a tuple (cte_decoded_string, defects). The
|
||||
# cte_decoded_string is the original binary that was encoded using the
|
||||
# specified cte. 'defects' is a list of MessageDefect instances indicating any
|
||||
# problems encountered during conversion. 'charset' and 'lang' are the
|
||||
# corresponding strings extracted from the EW, case preserved.
|
||||
#
|
||||
# The general interface for a CTE encoder is that it takes a binary sequence
|
||||
# as input and returns the cte_encoded_string, which is an ascii-only string.
|
||||
#
|
||||
# Each decoder must also supply a length function that takes the binary
|
||||
# sequence as its argument and returns the length of the resulting encoded
|
||||
# string.
|
||||
#
|
||||
# The main API functions for the module are decode, which calls the decoder
|
||||
# referenced by the cte specifier, and encode, which adds the appropriate
|
||||
# RFC 2047 "chrome" to the encoded string, and can optionally automatically
|
||||
# select the shortest possible encoding. See their docstrings below for
|
||||
# details.
|
||||
|
||||
import re
|
||||
import base64
|
||||
import binascii
|
||||
import functools
|
||||
from string import ascii_letters, digits
|
||||
from email import errors
|
||||
|
||||
__all__ = ['decode_q',
|
||||
'encode_q',
|
||||
'decode_b',
|
||||
'encode_b',
|
||||
'len_q',
|
||||
'len_b',
|
||||
'decode',
|
||||
'encode',
|
||||
]
|
||||
|
||||
#
|
||||
# Quoted Printable
|
||||
#
|
||||
|
||||
# regex based decoder.
|
||||
_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub,
|
||||
lambda m: bytes([int(m.group(1), 16)]))
|
||||
|
||||
def decode_q(encoded):
|
||||
encoded = encoded.replace(b'_', b' ')
|
||||
return _q_byte_subber(encoded), []
|
||||
|
||||
|
||||
# dict mapping bytes to their encoded form
|
||||
class _QByteMap(dict):
|
||||
|
||||
safe = b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')
|
||||
|
||||
def __missing__(self, key):
|
||||
if key in self.safe:
|
||||
self[key] = chr(key)
|
||||
else:
|
||||
self[key] = "={:02X}".format(key)
|
||||
return self[key]
|
||||
|
||||
_q_byte_map = _QByteMap()
|
||||
|
||||
# In headers spaces are mapped to '_'.
|
||||
_q_byte_map[ord(' ')] = '_'
|
||||
|
||||
def encode_q(bstring):
|
||||
return ''.join(_q_byte_map[x] for x in bstring)
|
||||
|
||||
def len_q(bstring):
|
||||
return sum(len(_q_byte_map[x]) for x in bstring)
|
||||
|
||||
|
||||
#
|
||||
# Base64
|
||||
#
|
||||
|
||||
def decode_b(encoded):
|
||||
defects = []
|
||||
pad_err = len(encoded) % 4
|
||||
if pad_err:
|
||||
defects.append(errors.InvalidBase64PaddingDefect())
|
||||
padded_encoded = encoded + b'==='[:4-pad_err]
|
||||
else:
|
||||
padded_encoded = encoded
|
||||
try:
|
||||
return base64.b64decode(padded_encoded, validate=True), defects
|
||||
except binascii.Error:
|
||||
# Since we had correct padding, this must an invalid char error.
|
||||
defects = [errors.InvalidBase64CharactersDefect()]
|
||||
# The non-alphabet characters are ignored as far as padding
|
||||
# goes, but we don't know how many there are. So we'll just
|
||||
# try various padding lengths until something works.
|
||||
for i in 0, 1, 2, 3:
|
||||
try:
|
||||
return base64.b64decode(encoded+b'='*i, validate=False), defects
|
||||
except binascii.Error:
|
||||
if i==0:
|
||||
defects.append(errors.InvalidBase64PaddingDefect())
|
||||
else:
|
||||
# This should never happen.
|
||||
raise AssertionError("unexpected binascii.Error")
|
||||
|
||||
def encode_b(bstring):
|
||||
return base64.b64encode(bstring).decode('ascii')
|
||||
|
||||
def len_b(bstring):
|
||||
groups_of_3, leftover = divmod(len(bstring), 3)
|
||||
# 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
|
||||
return groups_of_3 * 4 + (4 if leftover else 0)
|
||||
|
||||
|
||||
_cte_decoders = {
|
||||
'q': decode_q,
|
||||
'b': decode_b,
|
||||
}
|
||||
|
||||
def decode(ew):
|
||||
"""Decode encoded word and return (string, charset, lang, defects) tuple.
|
||||
|
||||
An RFC 2047/2243 encoded word has the form:
|
||||
|
||||
=?charset*lang?cte?encoded_string?=
|
||||
|
||||
where '*lang' may be omitted but the other parts may not be.
|
||||
|
||||
This function expects exactly such a string (that is, it does not check the
|
||||
syntax and may raise errors if the string is not well formed), and returns
|
||||
the encoded_string decoded first from its Content Transfer Encoding and
|
||||
then from the resulting bytes into unicode using the specified charset. If
|
||||
the cte-decoded string does not successfully decode using the specified
|
||||
character set, a defect is added to the defects list and the unknown octets
|
||||
are replaced by the unicode 'unknown' character \uFDFF.
|
||||
|
||||
The specified charset and language are returned. The default for language,
|
||||
which is rarely if ever encountered, is the empty string.
|
||||
|
||||
"""
|
||||
_, charset, cte, cte_string, _ = ew.split('?')
|
||||
charset, _, lang = charset.partition('*')
|
||||
cte = cte.lower()
|
||||
# Recover the original bytes and do CTE decoding.
|
||||
bstring = cte_string.encode('ascii', 'surrogateescape')
|
||||
bstring, defects = _cte_decoders[cte](bstring)
|
||||
# Turn the CTE decoded bytes into unicode.
|
||||
try:
|
||||
string = bstring.decode(charset)
|
||||
except UnicodeError:
|
||||
defects.append(errors.UndecodableBytesDefect("Encoded word "
|
||||
"contains bytes not decodable using {} charset".format(charset)))
|
||||
string = bstring.decode(charset, 'surrogateescape')
|
||||
except LookupError:
|
||||
string = bstring.decode('ascii', 'surrogateescape')
|
||||
if charset.lower() != 'unknown-8bit':
|
||||
defects.append(errors.CharsetError("Unknown charset {} "
|
||||
"in encoded word; decoded as unknown bytes".format(charset)))
|
||||
return string, charset, lang, defects
|
||||
|
||||
|
||||
_cte_encoders = {
|
||||
'q': encode_q,
|
||||
'b': encode_b,
|
||||
}
|
||||
|
||||
_cte_encode_length = {
|
||||
'q': len_q,
|
||||
'b': len_b,
|
||||
}
|
||||
|
||||
def encode(string, charset='utf-8', encoding=None, lang=''):
|
||||
"""Encode string using the CTE encoding that produces the shorter result.
|
||||
|
||||
Produces an RFC 2047/2243 encoded word of the form:
|
||||
|
||||
=?charset*lang?cte?encoded_string?=
|
||||
|
||||
where '*lang' is omitted unless the 'lang' parameter is given a value.
|
||||
Optional argument charset (defaults to utf-8) specifies the charset to use
|
||||
to encode the string to binary before CTE encoding it. Optional argument
|
||||
'encoding' is the cte specifier for the encoding that should be used ('q'
|
||||
or 'b'); if it is None (the default) the encoding which produces the
|
||||
shortest encoded sequence is used, except that 'q' is preferred if it is up
|
||||
to five characters longer. Optional argument 'lang' (default '') gives the
|
||||
RFC 2243 language string to specify in the encoded word.
|
||||
|
||||
"""
|
||||
if charset == 'unknown-8bit':
|
||||
bstring = string.encode('ascii', 'surrogateescape')
|
||||
else:
|
||||
bstring = string.encode(charset)
|
||||
if encoding is None:
|
||||
qlen = _cte_encode_length['q'](bstring)
|
||||
blen = _cte_encode_length['b'](bstring)
|
||||
# Bias toward q. 5 is arbitrary.
|
||||
encoding = 'q' if qlen - blen < 5 else 'b'
|
||||
encoded = _cte_encoders[encoding](bstring)
|
||||
if lang:
|
||||
lang = '*' + lang
|
||||
return "=?{}{}?{}?{}?=".format(charset, lang, encoding, encoded)
|
|
@ -0,0 +1,540 @@
|
|||
# Copyright (C) 2002-2007 Python Software Foundation
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Email address parsing code.
|
||||
|
||||
Lifted directly from rfc822.py. This should eventually be rewritten.
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
'mktime_tz',
|
||||
'parsedate',
|
||||
'parsedate_tz',
|
||||
'quote',
|
||||
]
|
||||
|
||||
import time, calendar
|
||||
|
||||
SPACE = ' '
|
||||
EMPTYSTRING = ''
|
||||
COMMASPACE = ', '
|
||||
|
||||
# Parse a date field
|
||||
_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
|
||||
'aug', 'sep', 'oct', 'nov', 'dec',
|
||||
'january', 'february', 'march', 'april', 'may', 'june', 'july',
|
||||
'august', 'september', 'october', 'november', 'december']
|
||||
|
||||
_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
|
||||
|
||||
# The timezone table does not include the military time zones defined
|
||||
# in RFC822, other than Z. According to RFC1123, the description in
|
||||
# RFC822 gets the signs wrong, so we can't rely on any such time
|
||||
# zones. RFC1123 recommends that numeric timezone indicators be used
|
||||
# instead of timezone names.
|
||||
|
||||
_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
|
||||
'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
|
||||
'EST': -500, 'EDT': -400, # Eastern
|
||||
'CST': -600, 'CDT': -500, # Central
|
||||
'MST': -700, 'MDT': -600, # Mountain
|
||||
'PST': -800, 'PDT': -700 # Pacific
|
||||
}
|
||||
|
||||
|
||||
def parsedate_tz(data):
|
||||
"""Convert a date string to a time tuple.
|
||||
|
||||
Accounts for military timezones.
|
||||
"""
|
||||
res = _parsedate_tz(data)
|
||||
if not res:
|
||||
return
|
||||
if res[9] is None:
|
||||
res[9] = 0
|
||||
return tuple(res)
|
||||
|
||||
def _parsedate_tz(data):
|
||||
"""Convert date to extended time tuple.
|
||||
|
||||
The last (additional) element is the time zone offset in seconds, except if
|
||||
the timezone was specified as -0000. In that case the last element is
|
||||
None. This indicates a UTC timestamp that explicitly declaims knowledge of
|
||||
the source timezone, as opposed to a +0000 timestamp that indicates the
|
||||
source timezone really was UTC.
|
||||
|
||||
"""
|
||||
if not data:
|
||||
return
|
||||
data = data.split()
|
||||
# The FWS after the comma after the day-of-week is optional, so search and
|
||||
# adjust for this.
|
||||
if data[0].endswith(',') or data[0].lower() in _daynames:
|
||||
# There's a dayname here. Skip it
|
||||
del data[0]
|
||||
else:
|
||||
i = data[0].rfind(',')
|
||||
if i >= 0:
|
||||
data[0] = data[0][i+1:]
|
||||
if len(data) == 3: # RFC 850 date, deprecated
|
||||
stuff = data[0].split('-')
|
||||
if len(stuff) == 3:
|
||||
data = stuff + data[1:]
|
||||
if len(data) == 4:
|
||||
s = data[3]
|
||||
i = s.find('+')
|
||||
if i == -1:
|
||||
i = s.find('-')
|
||||
if i > 0:
|
||||
data[3:] = [s[:i], s[i:]]
|
||||
else:
|
||||
data.append('') # Dummy tz
|
||||
if len(data) < 5:
|
||||
return None
|
||||
data = data[:5]
|
||||
[dd, mm, yy, tm, tz] = data
|
||||
mm = mm.lower()
|
||||
if mm not in _monthnames:
|
||||
dd, mm = mm, dd.lower()
|
||||
if mm not in _monthnames:
|
||||
return None
|
||||
mm = _monthnames.index(mm) + 1
|
||||
if mm > 12:
|
||||
mm -= 12
|
||||
if dd[-1] == ',':
|
||||
dd = dd[:-1]
|
||||
i = yy.find(':')
|
||||
if i > 0:
|
||||
yy, tm = tm, yy
|
||||
if yy[-1] == ',':
|
||||
yy = yy[:-1]
|
||||
if not yy[0].isdigit():
|
||||
yy, tz = tz, yy
|
||||
if tm[-1] == ',':
|
||||
tm = tm[:-1]
|
||||
tm = tm.split(':')
|
||||
if len(tm) == 2:
|
||||
[thh, tmm] = tm
|
||||
tss = '0'
|
||||
elif len(tm) == 3:
|
||||
[thh, tmm, tss] = tm
|
||||
elif len(tm) == 1 and '.' in tm[0]:
|
||||
# Some non-compliant MUAs use '.' to separate time elements.
|
||||
tm = tm[0].split('.')
|
||||
if len(tm) == 2:
|
||||
[thh, tmm] = tm
|
||||
tss = 0
|
||||
elif len(tm) == 3:
|
||||
[thh, tmm, tss] = tm
|
||||
else:
|
||||
return None
|
||||
try:
|
||||
yy = int(yy)
|
||||
dd = int(dd)
|
||||
thh = int(thh)
|
||||
tmm = int(tmm)
|
||||
tss = int(tss)
|
||||
except ValueError:
|
||||
return None
|
||||
# Check for a yy specified in two-digit format, then convert it to the
|
||||
# appropriate four-digit format, according to the POSIX standard. RFC 822
|
||||
# calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
|
||||
# mandates a 4-digit yy. For more information, see the documentation for
|
||||
# the time module.
|
||||
if yy < 100:
|
||||
# The year is between 1969 and 1999 (inclusive).
|
||||
if yy > 68:
|
||||
yy += 1900
|
||||
# The year is between 2000 and 2068 (inclusive).
|
||||
else:
|
||||
yy += 2000
|
||||
tzoffset = None
|
||||
tz = tz.upper()
|
||||
if tz in _timezones:
|
||||
tzoffset = _timezones[tz]
|
||||
else:
|
||||
try:
|
||||
tzoffset = int(tz)
|
||||
except ValueError:
|
||||
pass
|
||||
if tzoffset==0 and tz.startswith('-'):
|
||||
tzoffset = None
|
||||
# Convert a timezone offset into seconds ; -0500 -> -18000
|
||||
if tzoffset:
|
||||
if tzoffset < 0:
|
||||
tzsign = -1
|
||||
tzoffset = -tzoffset
|
||||
else:
|
||||
tzsign = 1
|
||||
tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
|
||||
# Daylight Saving Time flag is set to -1, since DST is unknown.
|
||||
return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
|
||||
|
||||
|
||||
def parsedate(data):
|
||||
"""Convert a time string to a time tuple."""
|
||||
t = parsedate_tz(data)
|
||||
if isinstance(t, tuple):
|
||||
return t[:9]
|
||||
else:
|
||||
return t
|
||||
|
||||
|
||||
def mktime_tz(data):
|
||||
"""Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
|
||||
if data[9] is None:
|
||||
# No zone info, so localtime is better assumption than GMT
|
||||
return time.mktime(data[:8] + (-1,))
|
||||
else:
|
||||
t = calendar.timegm(data)
|
||||
return t - data[9]
|
||||
|
||||
|
||||
def quote(str):
|
||||
"""Prepare string to be used in a quoted string.
|
||||
|
||||
Turns backslash and double quote characters into quoted pairs. These
|
||||
are the only characters that need to be quoted inside a quoted string.
|
||||
Does not add the surrounding double quotes.
|
||||
"""
|
||||
return str.replace('\\', '\\\\').replace('"', '\\"')
|
||||
|
||||
|
||||
class AddrlistClass:
|
||||
"""Address parser class by Ben Escoto.
|
||||
|
||||
To understand what this class does, it helps to have a copy of RFC 2822 in
|
||||
front of you.
|
||||
|
||||
Note: this class interface is deprecated and may be removed in the future.
|
||||
Use email.utils.AddressList instead.
|
||||
"""
|
||||
|
||||
def __init__(self, field):
|
||||
"""Initialize a new instance.
|
||||
|
||||
`field' is an unparsed address header field, containing
|
||||
one or more addresses.
|
||||
"""
|
||||
self.specials = '()<>@,:;.\"[]'
|
||||
self.pos = 0
|
||||
self.LWS = ' \t'
|
||||
self.CR = '\r\n'
|
||||
self.FWS = self.LWS + self.CR
|
||||
self.atomends = self.specials + self.LWS + self.CR
|
||||
# Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
|
||||
# is obsolete syntax. RFC 2822 requires that we recognize obsolete
|
||||
# syntax, so allow dots in phrases.
|
||||
self.phraseends = self.atomends.replace('.', '')
|
||||
self.field = field
|
||||
self.commentlist = []
|
||||
|
||||
def gotonext(self):
|
||||
"""Skip white space and extract comments."""
|
||||
wslist = []
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in self.LWS + '\n\r':
|
||||
if self.field[self.pos] not in '\n\r':
|
||||
wslist.append(self.field[self.pos])
|
||||
self.pos += 1
|
||||
elif self.field[self.pos] == '(':
|
||||
self.commentlist.append(self.getcomment())
|
||||
else:
|
||||
break
|
||||
return EMPTYSTRING.join(wslist)
|
||||
|
||||
def getaddrlist(self):
|
||||
"""Parse all addresses.
|
||||
|
||||
Returns a list containing all of the addresses.
|
||||
"""
|
||||
result = []
|
||||
while self.pos < len(self.field):
|
||||
ad = self.getaddress()
|
||||
if ad:
|
||||
result += ad
|
||||
else:
|
||||
result.append(('', ''))
|
||||
return result
|
||||
|
||||
def getaddress(self):
|
||||
"""Parse the next address."""
|
||||
self.commentlist = []
|
||||
self.gotonext()
|
||||
|
||||
oldpos = self.pos
|
||||
oldcl = self.commentlist
|
||||
plist = self.getphraselist()
|
||||
|
||||
self.gotonext()
|
||||
returnlist = []
|
||||
|
||||
if self.pos >= len(self.field):
|
||||
# Bad email address technically, no domain.
|
||||
if plist:
|
||||
returnlist = [(SPACE.join(self.commentlist), plist[0])]
|
||||
|
||||
elif self.field[self.pos] in '.@':
|
||||
# email address is just an addrspec
|
||||
# this isn't very efficient since we start over
|
||||
self.pos = oldpos
|
||||
self.commentlist = oldcl
|
||||
addrspec = self.getaddrspec()
|
||||
returnlist = [(SPACE.join(self.commentlist), addrspec)]
|
||||
|
||||
elif self.field[self.pos] == ':':
|
||||
# address is a group
|
||||
returnlist = []
|
||||
|
||||
fieldlen = len(self.field)
|
||||
self.pos += 1
|
||||
while self.pos < len(self.field):
|
||||
self.gotonext()
|
||||
if self.pos < fieldlen and self.field[self.pos] == ';':
|
||||
self.pos += 1
|
||||
break
|
||||
returnlist = returnlist + self.getaddress()
|
||||
|
||||
elif self.field[self.pos] == '<':
|
||||
# Address is a phrase then a route addr
|
||||
routeaddr = self.getrouteaddr()
|
||||
|
||||
if self.commentlist:
|
||||
returnlist = [(SPACE.join(plist) + ' (' +
|
||||
' '.join(self.commentlist) + ')', routeaddr)]
|
||||
else:
|
||||
returnlist = [(SPACE.join(plist), routeaddr)]
|
||||
|
||||
else:
|
||||
if plist:
|
||||
returnlist = [(SPACE.join(self.commentlist), plist[0])]
|
||||
elif self.field[self.pos] in self.specials:
|
||||
self.pos += 1
|
||||
|
||||
self.gotonext()
|
||||
if self.pos < len(self.field) and self.field[self.pos] == ',':
|
||||
self.pos += 1
|
||||
return returnlist
|
||||
|
||||
def getrouteaddr(self):
|
||||
"""Parse a route address (Return-path value).
|
||||
|
||||
This method just skips all the route stuff and returns the addrspec.
|
||||
"""
|
||||
if self.field[self.pos] != '<':
|
||||
return
|
||||
|
||||
expectroute = False
|
||||
self.pos += 1
|
||||
self.gotonext()
|
||||
adlist = ''
|
||||
while self.pos < len(self.field):
|
||||
if expectroute:
|
||||
self.getdomain()
|
||||
expectroute = False
|
||||
elif self.field[self.pos] == '>':
|
||||
self.pos += 1
|
||||
break
|
||||
elif self.field[self.pos] == '@':
|
||||
self.pos += 1
|
||||
expectroute = True
|
||||
elif self.field[self.pos] == ':':
|
||||
self.pos += 1
|
||||
else:
|
||||
adlist = self.getaddrspec()
|
||||
self.pos += 1
|
||||
break
|
||||
self.gotonext()
|
||||
|
||||
return adlist
|
||||
|
||||
def getaddrspec(self):
|
||||
"""Parse an RFC 2822 addr-spec."""
|
||||
aslist = []
|
||||
|
||||
self.gotonext()
|
||||
while self.pos < len(self.field):
|
||||
preserve_ws = True
|
||||
if self.field[self.pos] == '.':
|
||||
if aslist and not aslist[-1].strip():
|
||||
aslist.pop()
|
||||
aslist.append('.')
|
||||
self.pos += 1
|
||||
preserve_ws = False
|
||||
elif self.field[self.pos] == '"':
|
||||
aslist.append('"%s"' % quote(self.getquote()))
|
||||
elif self.field[self.pos] in self.atomends:
|
||||
if aslist and not aslist[-1].strip():
|
||||
aslist.pop()
|
||||
break
|
||||
else:
|
||||
aslist.append(self.getatom())
|
||||
ws = self.gotonext()
|
||||
if preserve_ws and ws:
|
||||
aslist.append(ws)
|
||||
|
||||
if self.pos >= len(self.field) or self.field[self.pos] != '@':
|
||||
return EMPTYSTRING.join(aslist)
|
||||
|
||||
aslist.append('@')
|
||||
self.pos += 1
|
||||
self.gotonext()
|
||||
return EMPTYSTRING.join(aslist) + self.getdomain()
|
||||
|
||||
def getdomain(self):
|
||||
"""Get the complete domain name from an address."""
|
||||
sdlist = []
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in self.LWS:
|
||||
self.pos += 1
|
||||
elif self.field[self.pos] == '(':
|
||||
self.commentlist.append(self.getcomment())
|
||||
elif self.field[self.pos] == '[':
|
||||
sdlist.append(self.getdomainliteral())
|
||||
elif self.field[self.pos] == '.':
|
||||
self.pos += 1
|
||||
sdlist.append('.')
|
||||
elif self.field[self.pos] in self.atomends:
|
||||
break
|
||||
else:
|
||||
sdlist.append(self.getatom())
|
||||
return EMPTYSTRING.join(sdlist)
|
||||
|
||||
def getdelimited(self, beginchar, endchars, allowcomments=True):
|
||||
"""Parse a header fragment delimited by special characters.
|
||||
|
||||
`beginchar' is the start character for the fragment.
|
||||
If self is not looking at an instance of `beginchar' then
|
||||
getdelimited returns the empty string.
|
||||
|
||||
`endchars' is a sequence of allowable end-delimiting characters.
|
||||
Parsing stops when one of these is encountered.
|
||||
|
||||
If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
|
||||
within the parsed fragment.
|
||||
"""
|
||||
if self.field[self.pos] != beginchar:
|
||||
return ''
|
||||
|
||||
slist = ['']
|
||||
quote = False
|
||||
self.pos += 1
|
||||
while self.pos < len(self.field):
|
||||
if quote:
|
||||
slist.append(self.field[self.pos])
|
||||
quote = False
|
||||
elif self.field[self.pos] in endchars:
|
||||
self.pos += 1
|
||||
break
|
||||
elif allowcomments and self.field[self.pos] == '(':
|
||||
slist.append(self.getcomment())
|
||||
continue # have already advanced pos from getcomment
|
||||
elif self.field[self.pos] == '\\':
|
||||
quote = True
|
||||
else:
|
||||
slist.append(self.field[self.pos])
|
||||
self.pos += 1
|
||||
|
||||
return EMPTYSTRING.join(slist)
|
||||
|
||||
def getquote(self):
|
||||
"""Get a quote-delimited fragment from self's field."""
|
||||
return self.getdelimited('"', '"\r', False)
|
||||
|
||||
def getcomment(self):
|
||||
"""Get a parenthesis-delimited fragment from self's field."""
|
||||
return self.getdelimited('(', ')\r', True)
|
||||
|
||||
def getdomainliteral(self):
|
||||
"""Parse an RFC 2822 domain-literal."""
|
||||
return '[%s]' % self.getdelimited('[', ']\r', False)
|
||||
|
||||
def getatom(self, atomends=None):
|
||||
"""Parse an RFC 2822 atom.
|
||||
|
||||
Optional atomends specifies a different set of end token delimiters
|
||||
(the default is to use self.atomends). This is used e.g. in
|
||||
getphraselist() since phrase endings must not include the `.' (which
|
||||
is legal in phrases)."""
|
||||
atomlist = ['']
|
||||
if atomends is None:
|
||||
atomends = self.atomends
|
||||
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in atomends:
|
||||
break
|
||||
else:
|
||||
atomlist.append(self.field[self.pos])
|
||||
self.pos += 1
|
||||
|
||||
return EMPTYSTRING.join(atomlist)
|
||||
|
||||
def getphraselist(self):
|
||||
"""Parse a sequence of RFC 2822 phrases.
|
||||
|
||||
A phrase is a sequence of words, which are in turn either RFC 2822
|
||||
atoms or quoted-strings. Phrases are canonicalized by squeezing all
|
||||
runs of continuous whitespace into one space.
|
||||
"""
|
||||
plist = []
|
||||
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in self.FWS:
|
||||
self.pos += 1
|
||||
elif self.field[self.pos] == '"':
|
||||
plist.append(self.getquote())
|
||||
elif self.field[self.pos] == '(':
|
||||
self.commentlist.append(self.getcomment())
|
||||
elif self.field[self.pos] in self.phraseends:
|
||||
break
|
||||
else:
|
||||
plist.append(self.getatom(self.phraseends))
|
||||
|
||||
return plist
|
||||
|
||||
class AddressList(AddrlistClass):
|
||||
"""An AddressList encapsulates a list of parsed RFC 2822 addresses."""
|
||||
def __init__(self, field):
|
||||
AddrlistClass.__init__(self, field)
|
||||
if field:
|
||||
self.addresslist = self.getaddrlist()
|
||||
else:
|
||||
self.addresslist = []
|
||||
|
||||
def __len__(self):
|
||||
return len(self.addresslist)
|
||||
|
||||
def __add__(self, other):
|
||||
# Set union
|
||||
newaddr = AddressList(None)
|
||||
newaddr.addresslist = self.addresslist[:]
|
||||
for x in other.addresslist:
|
||||
if not x in self.addresslist:
|
||||
newaddr.addresslist.append(x)
|
||||
return newaddr
|
||||
|
||||
def __iadd__(self, other):
|
||||
# Set union, in-place
|
||||
for x in other.addresslist:
|
||||
if not x in self.addresslist:
|
||||
self.addresslist.append(x)
|
||||
return self
|
||||
|
||||
def __sub__(self, other):
|
||||
# Set difference
|
||||
newaddr = AddressList(None)
|
||||
for x in self.addresslist:
|
||||
if not x in other.addresslist:
|
||||
newaddr.addresslist.append(x)
|
||||
return newaddr
|
||||
|
||||
def __isub__(self, other):
|
||||
# Set difference, in-place
|
||||
for x in other.addresslist:
|
||||
if x in self.addresslist:
|
||||
self.addresslist.remove(x)
|
||||
return self
|
||||
|
||||
def __getitem__(self, index):
|
||||
# Make indexing, slices, and 'in' work
|
||||
return self.addresslist[index]
|
|
@ -0,0 +1,359 @@
|
|||
"""Policy framework for the email package.
|
||||
|
||||
Allows fine grained feature control of how the package parses and emits data.
|
||||
"""
|
||||
|
||||
import abc
|
||||
from email import header
|
||||
from email import charset as _charset
|
||||
from email.utils import _has_surrogates
|
||||
|
||||
__all__ = [
|
||||
'Policy',
|
||||
'Compat32',
|
||||
'compat32',
|
||||
]
|
||||
|
||||
|
||||
class _PolicyBase:
|
||||
|
||||
"""Policy Object basic framework.
|
||||
|
||||
This class is useless unless subclassed. A subclass should define
|
||||
class attributes with defaults for any values that are to be
|
||||
managed by the Policy object. The constructor will then allow
|
||||
non-default values to be set for these attributes at instance
|
||||
creation time. The instance will be callable, taking these same
|
||||
attributes keyword arguments, and returning a new instance
|
||||
identical to the called instance except for those values changed
|
||||
by the keyword arguments. Instances may be added, yielding new
|
||||
instances with any non-default values from the right hand
|
||||
operand overriding those in the left hand operand. That is,
|
||||
|
||||
A + B == A(<non-default values of B>)
|
||||
|
||||
The repr of an instance can be used to reconstruct the object
|
||||
if and only if the repr of the values can be used to reconstruct
|
||||
those values.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, **kw):
|
||||
"""Create new Policy, possibly overriding some defaults.
|
||||
|
||||
See class docstring for a list of overridable attributes.
|
||||
|
||||
"""
|
||||
for name, value in kw.items():
|
||||
if hasattr(self, name):
|
||||
super(_PolicyBase,self).__setattr__(name, value)
|
||||
else:
|
||||
raise TypeError(
|
||||
"{!r} is an invalid keyword argument for {}".format(
|
||||
name, self.__class__.__name__))
|
||||
|
||||
def __repr__(self):
|
||||
args = [ "{}={!r}".format(name, value)
|
||||
for name, value in self.__dict__.items() ]
|
||||
return "{}({})".format(self.__class__.__name__, ', '.join(args))
|
||||
|
||||
def clone(self, **kw):
|
||||
"""Return a new instance with specified attributes changed.
|
||||
|
||||
The new instance has the same attribute values as the current object,
|
||||
except for the changes passed in as keyword arguments.
|
||||
|
||||
"""
|
||||
newpolicy = self.__class__.__new__(self.__class__)
|
||||
for attr, value in self.__dict__.items():
|
||||
object.__setattr__(newpolicy, attr, value)
|
||||
for attr, value in kw.items():
|
||||
if not hasattr(self, attr):
|
||||
raise TypeError(
|
||||
"{!r} is an invalid keyword argument for {}".format(
|
||||
attr, self.__class__.__name__))
|
||||
object.__setattr__(newpolicy, attr, value)
|
||||
return newpolicy
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
if hasattr(self, name):
|
||||
msg = "{!r} object attribute {!r} is read-only"
|
||||
else:
|
||||
msg = "{!r} object has no attribute {!r}"
|
||||
raise AttributeError(msg.format(self.__class__.__name__, name))
|
||||
|
||||
def __add__(self, other):
|
||||
"""Non-default values from right operand override those from left.
|
||||
|
||||
The object returned is a new instance of the subclass.
|
||||
|
||||
"""
|
||||
return self.clone(**other.__dict__)
|
||||
|
||||
|
||||
def _append_doc(doc, added_doc):
|
||||
doc = doc.rsplit('\n', 1)[0]
|
||||
added_doc = added_doc.split('\n', 1)[1]
|
||||
return doc + '\n' + added_doc
|
||||
|
||||
def _extend_docstrings(cls):
|
||||
return cls
|
||||
if cls.__doc__ and cls.__doc__.startswith('+'):
|
||||
cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__)
|
||||
for name, attr in cls.__dict__.items():
|
||||
if attr.__doc__ and attr.__doc__.startswith('+'):
|
||||
for c in (c for base in cls.__bases__ for c in base.mro()):
|
||||
doc = getattr(getattr(c, name), '__doc__')
|
||||
if doc:
|
||||
attr.__doc__ = _append_doc(doc, attr.__doc__)
|
||||
break
|
||||
return cls
|
||||
|
||||
|
||||
class Policy(_PolicyBase):#, metaclass=abc.ABCMeta):
|
||||
|
||||
r"""Controls for how messages are interpreted and formatted.
|
||||
|
||||
Most of the classes and many of the methods in the email package accept
|
||||
Policy objects as parameters. A Policy object contains a set of values and
|
||||
functions that control how input is interpreted and how output is rendered.
|
||||
For example, the parameter 'raise_on_defect' controls whether or not an RFC
|
||||
violation results in an error being raised or not, while 'max_line_length'
|
||||
controls the maximum length of output lines when a Message is serialized.
|
||||
|
||||
Any valid attribute may be overridden when a Policy is created by passing
|
||||
it as a keyword argument to the constructor. Policy objects are immutable,
|
||||
but a new Policy object can be created with only certain values changed by
|
||||
calling the Policy instance with keyword arguments. Policy objects can
|
||||
also be added, producing a new Policy object in which the non-default
|
||||
attributes set in the right hand operand overwrite those specified in the
|
||||
left operand.
|
||||
|
||||
Settable attributes:
|
||||
|
||||
raise_on_defect -- If true, then defects should be raised as errors.
|
||||
Default: False.
|
||||
|
||||
linesep -- string containing the value to use as separation
|
||||
between output lines. Default '\n'.
|
||||
|
||||
cte_type -- Type of allowed content transfer encodings
|
||||
|
||||
7bit -- ASCII only
|
||||
8bit -- Content-Transfer-Encoding: 8bit is allowed
|
||||
|
||||
Default: 8bit. Also controls the disposition of
|
||||
(RFC invalid) binary data in headers; see the
|
||||
documentation of the binary_fold method.
|
||||
|
||||
max_line_length -- maximum length of lines, excluding 'linesep',
|
||||
during serialization. None or 0 means no line
|
||||
wrapping is done. Default is 78.
|
||||
|
||||
"""
|
||||
|
||||
raise_on_defect = False
|
||||
linesep = '\n'
|
||||
cte_type = '8bit'
|
||||
max_line_length = 78
|
||||
|
||||
def handle_defect(self, obj, defect):
|
||||
"""Based on policy, either raise defect or call register_defect.
|
||||
|
||||
handle_defect(obj, defect)
|
||||
|
||||
defect should be a Defect subclass, but in any case must be an
|
||||
Exception subclass. obj is the object on which the defect should be
|
||||
registered if it is not raised. If the raise_on_defect is True, the
|
||||
defect is raised as an error, otherwise the object and the defect are
|
||||
passed to register_defect.
|
||||
|
||||
This method is intended to be called by parsers that discover defects.
|
||||
The email package parsers always call it with Defect instances.
|
||||
|
||||
"""
|
||||
if self.raise_on_defect:
|
||||
raise defect
|
||||
self.register_defect(obj, defect)
|
||||
|
||||
def register_defect(self, obj, defect):
|
||||
"""Record 'defect' on 'obj'.
|
||||
|
||||
Called by handle_defect if raise_on_defect is False. This method is
|
||||
part of the Policy API so that Policy subclasses can implement custom
|
||||
defect handling. The default implementation calls the append method of
|
||||
the defects attribute of obj. The objects used by the email package by
|
||||
default that get passed to this method will always have a defects
|
||||
attribute with an append method.
|
||||
|
||||
"""
|
||||
obj.defects.append(defect)
|
||||
|
||||
def header_max_count(self, name):
|
||||
"""Return the maximum allowed number of headers named 'name'.
|
||||
|
||||
Called when a header is added to a Message object. If the returned
|
||||
value is not 0 or None, and there are already a number of headers with
|
||||
the name 'name' equal to the value returned, a ValueError is raised.
|
||||
|
||||
Because the default behavior of Message's __setitem__ is to append the
|
||||
value to the list of headers, it is easy to create duplicate headers
|
||||
without realizing it. This method allows certain headers to be limited
|
||||
in the number of instances of that header that may be added to a
|
||||
Message programmatically. (The limit is not observed by the parser,
|
||||
which will faithfully produce as many headers as exist in the message
|
||||
being parsed.)
|
||||
|
||||
The default implementation returns None for all header names.
|
||||
"""
|
||||
return None
|
||||
|
||||
@abc.abstractmethod
|
||||
def header_source_parse(self, sourcelines):
|
||||
"""Given a list of linesep terminated strings constituting the lines of
|
||||
a single header, return the (name, value) tuple that should be stored
|
||||
in the model. The input lines should retain their terminating linesep
|
||||
characters. The lines passed in by the email package may contain
|
||||
surrogateescaped binary data.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def header_store_parse(self, name, value):
|
||||
"""Given the header name and the value provided by the application
|
||||
program, return the (name, value) that should be stored in the model.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def header_fetch_parse(self, name, value):
|
||||
"""Given the header name and the value from the model, return the value
|
||||
to be returned to the application program that is requesting that
|
||||
header. The value passed in by the email package may contain
|
||||
surrogateescaped binary data if the lines were parsed by a BytesParser.
|
||||
The returned value should not contain any surrogateescaped data.
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def fold(self, name, value):
|
||||
"""Given the header name and the value from the model, return a string
|
||||
containing linesep characters that implement the folding of the header
|
||||
according to the policy controls. The value passed in by the email
|
||||
package may contain surrogateescaped binary data if the lines were
|
||||
parsed by a BytesParser. The returned value should not contain any
|
||||
surrogateescaped data.
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def fold_binary(self, name, value):
|
||||
"""Given the header name and the value from the model, return binary
|
||||
data containing linesep characters that implement the folding of the
|
||||
header according to the policy controls. The value passed in by the
|
||||
email package may contain surrogateescaped binary data.
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@_extend_docstrings
|
||||
class Compat32(Policy):
|
||||
|
||||
"""+
|
||||
This particular policy is the backward compatibility Policy. It
|
||||
replicates the behavior of the email package version 5.1.
|
||||
"""
|
||||
|
||||
def _sanitize_header(self, name, value):
|
||||
# If the header value contains surrogates, return a Header using
|
||||
# the unknown-8bit charset to encode the bytes as encoded words.
|
||||
if not isinstance(value, str):
|
||||
# Assume it is already a header object
|
||||
return value
|
||||
if _has_surrogates(value):
|
||||
return header.Header(value, charset=_charset.UNKNOWN8BIT,
|
||||
header_name=name)
|
||||
else:
|
||||
return value
|
||||
|
||||
def header_source_parse(self, sourcelines):
|
||||
"""+
|
||||
The name is parsed as everything up to the ':' and returned unmodified.
|
||||
The value is determined by stripping leading whitespace off the
|
||||
remainder of the first line, joining all subsequent lines together, and
|
||||
stripping any trailing carriage return or linefeed characters.
|
||||
|
||||
"""
|
||||
name, value = sourcelines[0].split(':', 1)
|
||||
value = value.lstrip(' \t') + ''.join(sourcelines[1:])
|
||||
return (name, value.rstrip('\r\n'))
|
||||
|
||||
def header_store_parse(self, name, value):
|
||||
"""+
|
||||
The name and value are returned unmodified.
|
||||
"""
|
||||
return (name, value)
|
||||
|
||||
def header_fetch_parse(self, name, value):
|
||||
"""+
|
||||
If the value contains binary data, it is converted into a Header object
|
||||
using the unknown-8bit charset. Otherwise it is returned unmodified.
|
||||
"""
|
||||
return self._sanitize_header(name, value)
|
||||
|
||||
def fold(self, name, value):
|
||||
"""+
|
||||
Headers are folded using the Header folding algorithm, which preserves
|
||||
existing line breaks in the value, and wraps each resulting line to the
|
||||
max_line_length. Non-ASCII binary data are CTE encoded using the
|
||||
unknown-8bit charset.
|
||||
|
||||
"""
|
||||
return self._fold(name, value, sanitize=True)
|
||||
|
||||
def fold_binary(self, name, value):
|
||||
"""+
|
||||
Headers are folded using the Header folding algorithm, which preserves
|
||||
existing line breaks in the value, and wraps each resulting line to the
|
||||
max_line_length. If cte_type is 7bit, non-ascii binary data is CTE
|
||||
encoded using the unknown-8bit charset. Otherwise the original source
|
||||
header is used, with its existing line breaks and/or binary data.
|
||||
|
||||
"""
|
||||
folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
|
||||
return folded.encode('ascii', 'surrogateescape')
|
||||
|
||||
def _fold(self, name, value, sanitize):
|
||||
parts = []
|
||||
parts.append('%s: ' % name)
|
||||
if isinstance(value, str):
|
||||
if _has_surrogates(value):
|
||||
if sanitize:
|
||||
h = header.Header(value,
|
||||
charset=_charset.UNKNOWN8BIT,
|
||||
header_name=name)
|
||||
else:
|
||||
# If we have raw 8bit data in a byte string, we have no idea
|
||||
# what the encoding is. There is no safe way to split this
|
||||
# string. If it's ascii-subset, then we could do a normal
|
||||
# ascii split, but if it's multibyte then we could break the
|
||||
# string. There's no way to know so the least harm seems to
|
||||
# be to not split the string and risk it being too long.
|
||||
parts.append(value)
|
||||
h = None
|
||||
else:
|
||||
h = header.Header(value, header_name=name)
|
||||
else:
|
||||
# Assume it is a Header-like object.
|
||||
h = value
|
||||
if h is not None:
|
||||
parts.append(h.encode(linesep=self.linesep,
|
||||
maxlinelen=self.max_line_length))
|
||||
parts.append(self.linesep)
|
||||
return ''.join(parts)
|
||||
|
||||
|
||||
compat32 = Compat32()
|
|
@ -0,0 +1,119 @@
|
|||
# Copyright (C) 2002-2007 Python Software Foundation
|
||||
# Author: Ben Gertzfield
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Base64 content transfer encoding per RFCs 2045-2047.
|
||||
|
||||
This module handles the content transfer encoding method defined in RFC 2045
|
||||
to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
|
||||
characters encoding known as Base64.
|
||||
|
||||
It is used in the MIME standards for email to attach images, audio, and text
|
||||
using some 8-bit character sets to messages.
|
||||
|
||||
This module provides an interface to encode and decode both headers and bodies
|
||||
with Base64 encoding.
|
||||
|
||||
RFC 2045 defines a method for including character set information in an
|
||||
`encoded-word' in a header. This method is commonly used for 8-bit real names
|
||||
in To:, From:, Cc:, etc. fields, as well as Subject: lines.
|
||||
|
||||
This module does not do the line wrapping or end-of-line character conversion
|
||||
necessary for proper internationalized headers; it only does dumb encoding and
|
||||
decoding. To deal with the various line wrapping issues, use the email.header
|
||||
module.
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
'body_decode',
|
||||
'body_encode',
|
||||
'decode',
|
||||
'decodestring',
|
||||
'header_encode',
|
||||
'header_length',
|
||||
]
|
||||
|
||||
|
||||
from base64 import b64encode
|
||||
from binascii import b2a_base64, a2b_base64
|
||||
|
||||
CRLF = '\r\n'
|
||||
NL = '\n'
|
||||
EMPTYSTRING = ''
|
||||
|
||||
# See also Charset.py
|
||||
MISC_LEN = 7
|
||||
|
||||
|
||||
|
||||
# Helpers
|
||||
def header_length(bytearray):
|
||||
"""Return the length of s when it is encoded with base64."""
|
||||
groups_of_3, leftover = divmod(len(bytearray), 3)
|
||||
# 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
|
||||
n = groups_of_3 * 4
|
||||
if leftover:
|
||||
n += 4
|
||||
return n
|
||||
|
||||
|
||||
|
||||
def header_encode(header_bytes, charset='iso-8859-1'):
|
||||
"""Encode a single header line with Base64 encoding in a given charset.
|
||||
|
||||
charset names the character set to use to encode the header. It defaults
|
||||
to iso-8859-1. Base64 encoding is defined in RFC 2045.
|
||||
"""
|
||||
if not header_bytes:
|
||||
return ""
|
||||
if isinstance(header_bytes, str):
|
||||
header_bytes = header_bytes.encode(charset)
|
||||
encoded = b64encode(header_bytes).decode("ascii")
|
||||
return '=?%s?b?%s?=' % (charset, encoded)
|
||||
|
||||
|
||||
|
||||
def body_encode(s, maxlinelen=76, eol=NL):
|
||||
r"""Encode a string with base64.
|
||||
|
||||
Each line will be wrapped at, at most, maxlinelen characters (defaults to
|
||||
76 characters).
|
||||
|
||||
Each line of encoded text will end with eol, which defaults to "\n". Set
|
||||
this to "\r\n" if you will be using the result of this function directly
|
||||
in an email.
|
||||
"""
|
||||
if not s:
|
||||
return s
|
||||
|
||||
encvec = []
|
||||
max_unencoded = maxlinelen * 3 // 4
|
||||
for i in range(0, len(s), max_unencoded):
|
||||
# BAW: should encode() inherit b2a_base64()'s dubious behavior in
|
||||
# adding a newline to the encoded string?
|
||||
enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
|
||||
if enc.endswith(NL) and eol != NL:
|
||||
enc = enc[:-1] + eol
|
||||
encvec.append(enc)
|
||||
return EMPTYSTRING.join(encvec)
|
||||
|
||||
|
||||
|
||||
def decode(string):
|
||||
"""Decode a raw base64 string, returning a bytes object.
|
||||
|
||||
This function does not parse a full MIME header value encoded with
|
||||
base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
|
||||
level email.header class for that functionality.
|
||||
"""
|
||||
if not string:
|
||||
return bytes()
|
||||
elif isinstance(string, str):
|
||||
return a2b_base64(string.encode('raw-unicode-escape'))
|
||||
else:
|
||||
return a2b_base64(string)
|
||||
|
||||
|
||||
# For convenience and backwards compatibility w/ standard base64 module
|
||||
body_decode = decode
|
||||
decodestring = decode
|
|
@ -0,0 +1,412 @@
|
|||
# Copyright (C) 2001-2007 Python Software Foundation
|
||||
# Author: Ben Gertzfield, Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
__all__ = [
|
||||
'Charset',
|
||||
'add_alias',
|
||||
'add_charset',
|
||||
'add_codec',
|
||||
]
|
||||
|
||||
from functools import partial
|
||||
|
||||
import email.base64mime
|
||||
import email.quoprimime
|
||||
|
||||
from email import errors
|
||||
from email.encoders import encode_7or8bit
|
||||
|
||||
|
||||
|
||||
# Flags for types of header encodings
|
||||
QP = 1 # Quoted-Printable
|
||||
BASE64 = 2 # Base64
|
||||
SHORTEST = 3 # the shorter of QP and base64, but only for headers
|
||||
|
||||
# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
|
||||
RFC2047_CHROME_LEN = 7
|
||||
|
||||
DEFAULT_CHARSET = 'us-ascii'
|
||||
UNKNOWN8BIT = 'unknown-8bit'
|
||||
EMPTYSTRING = ''
|
||||
|
||||
|
||||
|
||||
# Defaults
|
||||
CHARSETS = {
|
||||
# input header enc body enc output conv
|
||||
'iso-8859-1': (QP, QP, None),
|
||||
'iso-8859-2': (QP, QP, None),
|
||||
'iso-8859-3': (QP, QP, None),
|
||||
'iso-8859-4': (QP, QP, None),
|
||||
# iso-8859-5 is Cyrillic, and not especially used
|
||||
# iso-8859-6 is Arabic, also not particularly used
|
||||
# iso-8859-7 is Greek, QP will not make it readable
|
||||
# iso-8859-8 is Hebrew, QP will not make it readable
|
||||
'iso-8859-9': (QP, QP, None),
|
||||
'iso-8859-10': (QP, QP, None),
|
||||
# iso-8859-11 is Thai, QP will not make it readable
|
||||
'iso-8859-13': (QP, QP, None),
|
||||
'iso-8859-14': (QP, QP, None),
|
||||
'iso-8859-15': (QP, QP, None),
|
||||
'iso-8859-16': (QP, QP, None),
|
||||
'windows-1252':(QP, QP, None),
|
||||
'viscii': (QP, QP, None),
|
||||
'us-ascii': (None, None, None),
|
||||
'big5': (BASE64, BASE64, None),
|
||||
'gb2312': (BASE64, BASE64, None),
|
||||
'euc-jp': (BASE64, None, 'iso-2022-jp'),
|
||||
'shift_jis': (BASE64, None, 'iso-2022-jp'),
|
||||
'iso-2022-jp': (BASE64, None, None),
|
||||
'koi8-r': (BASE64, BASE64, None),
|
||||
'utf-8': (SHORTEST, BASE64, 'utf-8'),
|
||||
}
|
||||
|
||||
# Aliases for other commonly-used names for character sets. Map
|
||||
# them to the real ones used in email.
|
||||
ALIASES = {
|
||||
'latin_1': 'iso-8859-1',
|
||||
'latin-1': 'iso-8859-1',
|
||||
'latin_2': 'iso-8859-2',
|
||||
'latin-2': 'iso-8859-2',
|
||||
'latin_3': 'iso-8859-3',
|
||||
'latin-3': 'iso-8859-3',
|
||||
'latin_4': 'iso-8859-4',
|
||||
'latin-4': 'iso-8859-4',
|
||||
'latin_5': 'iso-8859-9',
|
||||
'latin-5': 'iso-8859-9',
|
||||
'latin_6': 'iso-8859-10',
|
||||
'latin-6': 'iso-8859-10',
|
||||
'latin_7': 'iso-8859-13',
|
||||
'latin-7': 'iso-8859-13',
|
||||
'latin_8': 'iso-8859-14',
|
||||
'latin-8': 'iso-8859-14',
|
||||
'latin_9': 'iso-8859-15',
|
||||
'latin-9': 'iso-8859-15',
|
||||
'latin_10':'iso-8859-16',
|
||||
'latin-10':'iso-8859-16',
|
||||
'cp949': 'ks_c_5601-1987',
|
||||
'euc_jp': 'euc-jp',
|
||||
'euc_kr': 'euc-kr',
|
||||
'ascii': 'us-ascii',
|
||||
}
|
||||
|
||||
|
||||
# Map charsets to their Unicode codec strings.
|
||||
CODEC_MAP = {
|
||||
'gb2312': 'eucgb2312_cn',
|
||||
'big5': 'big5_tw',
|
||||
# Hack: We don't want *any* conversion for stuff marked us-ascii, as all
|
||||
# sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
|
||||
# Let that stuff pass through without conversion to/from Unicode.
|
||||
'us-ascii': None,
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Convenience functions for extending the above mappings
|
||||
def add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
|
||||
"""Add character set properties to the global registry.
|
||||
|
||||
charset is the input character set, and must be the canonical name of a
|
||||
character set.
|
||||
|
||||
Optional header_enc and body_enc is either Charset.QP for
|
||||
quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for
|
||||
the shortest of qp or base64 encoding, or None for no encoding. SHORTEST
|
||||
is only valid for header_enc. It describes how message headers and
|
||||
message bodies in the input charset are to be encoded. Default is no
|
||||
encoding.
|
||||
|
||||
Optional output_charset is the character set that the output should be
|
||||
in. Conversions will proceed from input charset, to Unicode, to the
|
||||
output charset when the method Charset.convert() is called. The default
|
||||
is to output in the same character set as the input.
|
||||
|
||||
Both input_charset and output_charset must have Unicode codec entries in
|
||||
the module's charset-to-codec mapping; use add_codec(charset, codecname)
|
||||
to add codecs the module does not know about. See the codecs module's
|
||||
documentation for more information.
|
||||
"""
|
||||
if body_enc == SHORTEST:
|
||||
raise ValueError('SHORTEST not allowed for body_enc')
|
||||
CHARSETS[charset] = (header_enc, body_enc, output_charset)
|
||||
|
||||
|
||||
def add_alias(alias, canonical):
|
||||
"""Add a character set alias.
|
||||
|
||||
alias is the alias name, e.g. latin-1
|
||||
canonical is the character set's canonical name, e.g. iso-8859-1
|
||||
"""
|
||||
ALIASES[alias] = canonical
|
||||
|
||||
|
||||
def add_codec(charset, codecname):
|
||||
"""Add a codec that map characters in the given charset to/from Unicode.
|
||||
|
||||
charset is the canonical name of a character set. codecname is the name
|
||||
of a Python codec, as appropriate for the second argument to the unicode()
|
||||
built-in, or to the encode() method of a Unicode string.
|
||||
"""
|
||||
CODEC_MAP[charset] = codecname
|
||||
|
||||
|
||||
|
||||
# Convenience function for encoding strings, taking into account
|
||||
# that they might be unknown-8bit (ie: have surrogate-escaped bytes)
|
||||
def _encode(string, codec):
|
||||
if codec == UNKNOWN8BIT:
|
||||
return string.encode('ascii', 'surrogateescape')
|
||||
else:
|
||||
return string.encode(codec)
|
||||
|
||||
|
||||
|
||||
class Charset:
|
||||
"""Map character sets to their email properties.
|
||||
|
||||
This class provides information about the requirements imposed on email
|
||||
for a specific character set. It also provides convenience routines for
|
||||
converting between character sets, given the availability of the
|
||||
applicable codecs. Given a character set, it will do its best to provide
|
||||
information on how to use that character set in an email in an
|
||||
RFC-compliant way.
|
||||
|
||||
Certain character sets must be encoded with quoted-printable or base64
|
||||
when used in email headers or bodies. Certain character sets must be
|
||||
converted outright, and are not allowed in email. Instances of this
|
||||
module expose the following information about a character set:
|
||||
|
||||
input_charset: The initial character set specified. Common aliases
|
||||
are converted to their `official' email names (e.g. latin_1
|
||||
is converted to iso-8859-1). Defaults to 7-bit us-ascii.
|
||||
|
||||
header_encoding: If the character set must be encoded before it can be
|
||||
used in an email header, this attribute will be set to
|
||||
Charset.QP (for quoted-printable), Charset.BASE64 (for
|
||||
base64 encoding), or Charset.SHORTEST for the shortest of
|
||||
QP or BASE64 encoding. Otherwise, it will be None.
|
||||
|
||||
body_encoding: Same as header_encoding, but describes the encoding for the
|
||||
mail message's body, which indeed may be different than the
|
||||
header encoding. Charset.SHORTEST is not allowed for
|
||||
body_encoding.
|
||||
|
||||
output_charset: Some character sets must be converted before they can be
|
||||
used in email headers or bodies. If the input_charset is
|
||||
one of them, this attribute will contain the name of the
|
||||
charset output will be converted to. Otherwise, it will
|
||||
be None.
|
||||
|
||||
input_codec: The name of the Python codec used to convert the
|
||||
input_charset to Unicode. If no conversion codec is
|
||||
necessary, this attribute will be None.
|
||||
|
||||
output_codec: The name of the Python codec used to convert Unicode
|
||||
to the output_charset. If no conversion codec is necessary,
|
||||
this attribute will have the same value as the input_codec.
|
||||
"""
|
||||
def __init__(self, input_charset=DEFAULT_CHARSET):
|
||||
# RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to
|
||||
# unicode because its .lower() is locale insensitive. If the argument
|
||||
# is already a unicode, we leave it at that, but ensure that the
|
||||
# charset is ASCII, as the standard (RFC XXX) requires.
|
||||
try:
|
||||
if isinstance(input_charset, str):
|
||||
input_charset.encode('ascii')
|
||||
else:
|
||||
input_charset = str(input_charset, 'ascii')
|
||||
except UnicodeError:
|
||||
raise errors.CharsetError(input_charset)
|
||||
input_charset = input_charset.lower()
|
||||
# Set the input charset after filtering through the aliases
|
||||
self.input_charset = ALIASES.get(input_charset, input_charset)
|
||||
# We can try to guess which encoding and conversion to use by the
|
||||
# charset_map dictionary. Try that first, but let the user override
|
||||
# it.
|
||||
henc, benc, conv = CHARSETS.get(self.input_charset,
|
||||
(SHORTEST, BASE64, None))
|
||||
if not conv:
|
||||
conv = self.input_charset
|
||||
# Set the attributes, allowing the arguments to override the default.
|
||||
self.header_encoding = henc
|
||||
self.body_encoding = benc
|
||||
self.output_charset = ALIASES.get(conv, conv)
|
||||
# Now set the codecs. If one isn't defined for input_charset,
|
||||
# guess and try a Unicode codec with the same name as input_codec.
|
||||
self.input_codec = CODEC_MAP.get(self.input_charset,
|
||||
self.input_charset)
|
||||
self.output_codec = CODEC_MAP.get(self.output_charset,
|
||||
self.output_charset)
|
||||
|
||||
def __str__(self):
|
||||
return self.input_charset.lower()
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
def __eq__(self, other):
|
||||
return str(self) == str(other).lower()
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def get_body_encoding(self):
|
||||
"""Return the content-transfer-encoding used for body encoding.
|
||||
|
||||
This is either the string `quoted-printable' or `base64' depending on
|
||||
the encoding used, or it is a function in which case you should call
|
||||
the function with a single argument, the Message object being
|
||||
encoded. The function should then set the Content-Transfer-Encoding
|
||||
header itself to whatever is appropriate.
|
||||
|
||||
Returns "quoted-printable" if self.body_encoding is QP.
|
||||
Returns "base64" if self.body_encoding is BASE64.
|
||||
Returns conversion function otherwise.
|
||||
"""
|
||||
assert self.body_encoding != SHORTEST
|
||||
if self.body_encoding == QP:
|
||||
return 'quoted-printable'
|
||||
elif self.body_encoding == BASE64:
|
||||
return 'base64'
|
||||
else:
|
||||
return encode_7or8bit
|
||||
|
||||
def get_output_charset(self):
|
||||
"""Return the output character set.
|
||||
|
||||
This is self.output_charset if that is not None, otherwise it is
|
||||
self.input_charset.
|
||||
"""
|
||||
return self.output_charset or self.input_charset
|
||||
|
||||
def header_encode(self, string):
|
||||
"""Header-encode a string by converting it first to bytes.
|
||||
|
||||
The type of encoding (base64 or quoted-printable) will be based on
|
||||
this charset's `header_encoding`.
|
||||
|
||||
:param string: A unicode string for the header. It must be possible
|
||||
to encode this string to bytes using the character set's
|
||||
output codec.
|
||||
:return: The encoded string, with RFC 2047 chrome.
|
||||
"""
|
||||
codec = self.output_codec or 'us-ascii'
|
||||
header_bytes = _encode(string, codec)
|
||||
# 7bit/8bit encodings return the string unchanged (modulo conversions)
|
||||
encoder_module = self._get_encoder(header_bytes)
|
||||
if encoder_module is None:
|
||||
return string
|
||||
return encoder_module.header_encode(header_bytes, codec)
|
||||
|
||||
def header_encode_lines(self, string, maxlengths):
|
||||
"""Header-encode a string by converting it first to bytes.
|
||||
|
||||
This is similar to `header_encode()` except that the string is fit
|
||||
into maximum line lengths as given by the argument.
|
||||
|
||||
:param string: A unicode string for the header. It must be possible
|
||||
to encode this string to bytes using the character set's
|
||||
output codec.
|
||||
:param maxlengths: Maximum line length iterator. Each element
|
||||
returned from this iterator will provide the next maximum line
|
||||
length. This parameter is used as an argument to built-in next()
|
||||
and should never be exhausted. The maximum line lengths should
|
||||
not count the RFC 2047 chrome. These line lengths are only a
|
||||
hint; the splitter does the best it can.
|
||||
:return: Lines of encoded strings, each with RFC 2047 chrome.
|
||||
"""
|
||||
# See which encoding we should use.
|
||||
codec = self.output_codec or 'us-ascii'
|
||||
header_bytes = _encode(string, codec)
|
||||
encoder_module = self._get_encoder(header_bytes)
|
||||
encoder = partial(encoder_module.header_encode, charset=codec)
|
||||
# Calculate the number of characters that the RFC 2047 chrome will
|
||||
# contribute to each line.
|
||||
charset = self.get_output_charset()
|
||||
extra = len(charset) + RFC2047_CHROME_LEN
|
||||
# Now comes the hard part. We must encode bytes but we can't split on
|
||||
# bytes because some character sets are variable length and each
|
||||
# encoded word must stand on its own. So the problem is you have to
|
||||
# encode to bytes to figure out this word's length, but you must split
|
||||
# on characters. This causes two problems: first, we don't know how
|
||||
# many octets a specific substring of unicode characters will get
|
||||
# encoded to, and second, we don't know how many ASCII characters
|
||||
# those octets will get encoded to. Unless we try it. Which seems
|
||||
# inefficient. In the interest of being correct rather than fast (and
|
||||
# in the hope that there will be few encoded headers in any such
|
||||
# message), brute force it. :(
|
||||
lines = []
|
||||
current_line = []
|
||||
maxlen = next(maxlengths) - extra
|
||||
for character in string:
|
||||
current_line.append(character)
|
||||
this_line = EMPTYSTRING.join(current_line)
|
||||
length = encoder_module.header_length(_encode(this_line, charset))
|
||||
if length > maxlen:
|
||||
# This last character doesn't fit so pop it off.
|
||||
current_line.pop()
|
||||
# Does nothing fit on the first line?
|
||||
if not lines and not current_line:
|
||||
lines.append(None)
|
||||
else:
|
||||
separator = (' ' if lines else '')
|
||||
joined_line = EMPTYSTRING.join(current_line)
|
||||
header_bytes = _encode(joined_line, codec)
|
||||
lines.append(encoder(header_bytes))
|
||||
current_line = [character]
|
||||
maxlen = next(maxlengths) - extra
|
||||
joined_line = EMPTYSTRING.join(current_line)
|
||||
header_bytes = _encode(joined_line, codec)
|
||||
lines.append(encoder(header_bytes))
|
||||
return lines
|
||||
|
||||
def _get_encoder(self, header_bytes):
|
||||
if self.header_encoding == BASE64:
|
||||
return email.base64mime
|
||||
elif self.header_encoding == QP:
|
||||
return email.quoprimime
|
||||
elif self.header_encoding == SHORTEST:
|
||||
len64 = email.base64mime.header_length(header_bytes)
|
||||
lenqp = email.quoprimime.header_length(header_bytes)
|
||||
if len64 < lenqp:
|
||||
return email.base64mime
|
||||
else:
|
||||
return email.quoprimime
|
||||
else:
|
||||
return None
|
||||
|
||||
def body_encode(self, string):
|
||||
"""Body-encode a string by converting it first to bytes.
|
||||
|
||||
The type of encoding (base64 or quoted-printable) will be based on
|
||||
self.body_encoding. If body_encoding is None, we assume the
|
||||
output charset is a 7bit encoding, so re-encoding the decoded
|
||||
string using the ascii codec produces the correct string version
|
||||
of the content.
|
||||
"""
|
||||
# 7bit/8bit encodings return the string unchanged (module conversions)
|
||||
if self.body_encoding is BASE64:
|
||||
if isinstance(string, str):
|
||||
string = string.encode(self.output_charset)
|
||||
return email.base64mime.body_encode(string)
|
||||
elif self.body_encoding is QP:
|
||||
# quopromime.body_encode takes a string, but operates on it as if
|
||||
# it were a list of byte codes. For a (minimal) history on why
|
||||
# this is so, see changeset 0cf700464177. To correctly encode a
|
||||
# character set, then, we must turn it into pseudo bytes via the
|
||||
# latin1 charset, which will encode any byte as a single code point
|
||||
# between 0 and 255, which is what body_encode is expecting.
|
||||
#
|
||||
# Note that this clause doesn't handle the case of a _payload that
|
||||
# is already bytes. It never did, and the semantics of _payload
|
||||
# being bytes has never been nailed down, so fixing that is a
|
||||
# longer term TODO.
|
||||
if isinstance(string, str):
|
||||
string = string.encode(self.output_charset).decode('latin1')
|
||||
return email.quoprimime.body_encode(string)
|
||||
else:
|
||||
if isinstance(string, str):
|
||||
string = string.encode(self.output_charset).decode('ascii')
|
||||
return string
|
|
@ -0,0 +1,78 @@
|
|||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Encodings and related functions."""
|
||||
|
||||
__all__ = [
|
||||
'encode_7or8bit',
|
||||
'encode_base64',
|
||||
'encode_noop',
|
||||
'encode_quopri',
|
||||
]
|
||||
|
||||
|
||||
from base64 import encodebytes as _bencode
|
||||
from quopri import encodestring as _encodestring
|
||||
|
||||
|
||||
|
||||
def _qencode(s):
|
||||
enc = _encodestring(s, quotetabs=True)
|
||||
# Must encode spaces, which quopri.encodestring() doesn't do
|
||||
return enc.replace(b' ', b'=20')
|
||||
|
||||
|
||||
def encode_base64(msg):
|
||||
"""Encode the message's payload in Base64.
|
||||
|
||||
Also, add an appropriate Content-Transfer-Encoding header.
|
||||
"""
|
||||
orig = msg.get_payload(decode=True)
|
||||
encdata = str(_bencode(orig), 'ascii')
|
||||
msg.set_payload(encdata)
|
||||
msg['Content-Transfer-Encoding'] = 'base64'
|
||||
|
||||
|
||||
|
||||
def encode_quopri(msg):
|
||||
"""Encode the message's payload in quoted-printable.
|
||||
|
||||
Also, add an appropriate Content-Transfer-Encoding header.
|
||||
"""
|
||||
orig = msg.get_payload(decode=True)
|
||||
encdata = _qencode(orig)
|
||||
msg.set_payload(encdata)
|
||||
msg['Content-Transfer-Encoding'] = 'quoted-printable'
|
||||
|
||||
|
||||
|
||||
def encode_7or8bit(msg):
|
||||
"""Set the Content-Transfer-Encoding header to 7bit or 8bit."""
|
||||
orig = msg.get_payload(decode=True)
|
||||
if orig is None:
|
||||
# There's no payload. For backwards compatibility we use 7bit
|
||||
msg['Content-Transfer-Encoding'] = '7bit'
|
||||
return
|
||||
# We play a trick to make this go fast. If encoding/decode to ASCII
|
||||
# succeeds, we know the data must be 7bit, otherwise treat it as 8bit.
|
||||
try:
|
||||
if isinstance(orig, str):
|
||||
orig.encode('ascii')
|
||||
else:
|
||||
orig.decode('ascii')
|
||||
except UnicodeError:
|
||||
charset = msg.get_charset()
|
||||
output_cset = charset and charset.output_charset
|
||||
# iso-2022-* is non-ASCII but encodes to a 7-bit representation
|
||||
if output_cset and output_cset.lower().startswith('iso-2022-'):
|
||||
msg['Content-Transfer-Encoding'] = '7bit'
|
||||
else:
|
||||
msg['Content-Transfer-Encoding'] = '8bit'
|
||||
else:
|
||||
msg['Content-Transfer-Encoding'] = '7bit'
|
||||
|
||||
|
||||
|
||||
def encode_noop(msg):
|
||||
"""Do nothing."""
|
|
@ -0,0 +1,107 @@
|
|||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""email package exception classes."""
|
||||
|
||||
|
||||
class MessageError(Exception):
|
||||
"""Base class for errors in the email package."""
|
||||
|
||||
|
||||
class MessageParseError(MessageError):
|
||||
"""Base class for message parsing errors."""
|
||||
|
||||
|
||||
class HeaderParseError(MessageParseError):
|
||||
"""Error while parsing headers."""
|
||||
|
||||
|
||||
class BoundaryError(MessageParseError):
|
||||
"""Couldn't find terminating boundary."""
|
||||
|
||||
|
||||
class MultipartConversionError(MessageError):#, TypeError):
|
||||
"""Conversion to a multipart is prohibited."""
|
||||
|
||||
|
||||
class CharsetError(MessageError):
|
||||
"""An illegal charset was given."""
|
||||
|
||||
|
||||
# These are parsing defects which the parser was able to work around.
|
||||
class MessageDefect(ValueError):
|
||||
"""Base class for a message defect."""
|
||||
|
||||
def __init__(self, line=None):
|
||||
if line is not None:
|
||||
super().__init__(line)
|
||||
self.line = line
|
||||
|
||||
class NoBoundaryInMultipartDefect(MessageDefect):
|
||||
"""A message claimed to be a multipart but had no boundary parameter."""
|
||||
|
||||
class StartBoundaryNotFoundDefect(MessageDefect):
|
||||
"""The claimed start boundary was never found."""
|
||||
|
||||
class CloseBoundaryNotFoundDefect(MessageDefect):
|
||||
"""A start boundary was found, but not the corresponding close boundary."""
|
||||
|
||||
class FirstHeaderLineIsContinuationDefect(MessageDefect):
|
||||
"""A message had a continuation line as its first header line."""
|
||||
|
||||
class MisplacedEnvelopeHeaderDefect(MessageDefect):
|
||||
"""A 'Unix-from' header was found in the middle of a header block."""
|
||||
|
||||
class MissingHeaderBodySeparatorDefect(MessageDefect):
|
||||
"""Found line with no leading whitespace and no colon before blank line."""
|
||||
# XXX: backward compatibility, just in case (it was never emitted).
|
||||
MalformedHeaderDefect = MissingHeaderBodySeparatorDefect
|
||||
|
||||
class MultipartInvariantViolationDefect(MessageDefect):
|
||||
"""A message claimed to be a multipart but no subparts were found."""
|
||||
|
||||
class InvalidMultipartContentTransferEncodingDefect(MessageDefect):
|
||||
"""An invalid content transfer encoding was set on the multipart itself."""
|
||||
|
||||
class UndecodableBytesDefect(MessageDefect):
|
||||
"""Header contained bytes that could not be decoded"""
|
||||
|
||||
class InvalidBase64PaddingDefect(MessageDefect):
|
||||
"""base64 encoded sequence had an incorrect length"""
|
||||
|
||||
class InvalidBase64CharactersDefect(MessageDefect):
|
||||
"""base64 encoded sequence had characters not in base64 alphabet"""
|
||||
|
||||
# These errors are specific to header parsing.
|
||||
|
||||
class HeaderDefect(MessageDefect):
|
||||
"""Base class for a header defect."""
|
||||
|
||||
def __init__(self, *args, **kw):
|
||||
super().__init__(*args, **kw)
|
||||
|
||||
class InvalidHeaderDefect(HeaderDefect):
|
||||
"""Header is not valid, message gives details."""
|
||||
|
||||
class HeaderMissingRequiredValue(HeaderDefect):
|
||||
"""A header that must have a value had none"""
|
||||
|
||||
class NonPrintableDefect(HeaderDefect):
|
||||
"""ASCII characters outside the ascii-printable range found"""
|
||||
|
||||
def __init__(self, non_printables):
|
||||
super().__init__(non_printables)
|
||||
self.non_printables = non_printables
|
||||
|
||||
def __str__(self):
|
||||
return ("the following ASCII non-printables found in header: "
|
||||
"{}".format(self.non_printables))
|
||||
|
||||
class ObsoleteHeaderDefect(HeaderDefect):
|
||||
"""Header uses syntax declared obsolete by RFC 5322"""
|
||||
|
||||
class NonASCIILocalPartDefect(HeaderDefect):
|
||||
"""local_part contains non-ASCII characters"""
|
||||
# This defect only occurs during unicode parsing, not when
|
||||
# parsing messages decoded from binary.
|
|
@ -0,0 +1,516 @@
|
|||
# Copyright (C) 2004-2006 Python Software Foundation
|
||||
# Authors: Baxter, Wouters and Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""FeedParser - An email feed parser.
|
||||
|
||||
The feed parser implements an interface for incrementally parsing an email
|
||||
message, line by line. This has advantages for certain applications, such as
|
||||
those reading email messages off a socket.
|
||||
|
||||
FeedParser.feed() is the primary interface for pushing new data into the
|
||||
parser. It returns when there's nothing more it can do with the available
|
||||
data. When you have no more data to push into the parser, call .close().
|
||||
This completes the parsing and returns the root message object.
|
||||
|
||||
The other advantage of this parser is that it will never raise a parsing
|
||||
exception. Instead, when it finds something unexpected, it adds a 'defect' to
|
||||
the current message. Defects are just instances that live on the message
|
||||
object's .defects attribute.
|
||||
"""
|
||||
|
||||
__all__ = ['FeedParser', 'BytesFeedParser']
|
||||
|
||||
import re
|
||||
|
||||
from email import errors
|
||||
from email import message
|
||||
from email._policybase import compat32
|
||||
|
||||
NLCRE = re.compile('\r\n|\r|\n')
|
||||
NLCRE_bol = re.compile('(\r\n|\r|\n)')
|
||||
NLCRE_eol = re.compile('(\r\n|\r|\n)\Z')
|
||||
NLCRE_crack = re.compile('(\r\n|\r|\n)')
|
||||
# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
|
||||
# except controls, SP, and ":".
|
||||
headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])')
|
||||
EMPTYSTRING = ''
|
||||
NL = '\n'
|
||||
|
||||
NeedMoreData = object()
|
||||
|
||||
|
||||
|
||||
class BufferedSubFile(object):
|
||||
"""A file-ish object that can have new data loaded into it.
|
||||
|
||||
You can also push and pop line-matching predicates onto a stack. When the
|
||||
current predicate matches the current line, a false EOF response
|
||||
(i.e. empty string) is returned instead. This lets the parser adhere to a
|
||||
simple abstraction -- it parses until EOF closes the current message.
|
||||
"""
|
||||
def __init__(self):
|
||||
# The last partial line pushed into this object.
|
||||
self._partial = ''
|
||||
# The list of full, pushed lines, in reverse order
|
||||
self._lines = []
|
||||
# The stack of false-EOF checking predicates.
|
||||
self._eofstack = []
|
||||
# A flag indicating whether the file has been closed or not.
|
||||
self._closed = False
|
||||
|
||||
def push_eof_matcher(self, pred):
|
||||
self._eofstack.append(pred)
|
||||
|
||||
def pop_eof_matcher(self):
|
||||
return self._eofstack.pop()
|
||||
|
||||
def close(self):
|
||||
# Don't forget any trailing partial line.
|
||||
self._lines.append(self._partial)
|
||||
self._partial = ''
|
||||
self._closed = True
|
||||
|
||||
def readline(self):
|
||||
if not self._lines:
|
||||
if self._closed:
|
||||
return ''
|
||||
return NeedMoreData
|
||||
# Pop the line off the stack and see if it matches the current
|
||||
# false-EOF predicate.
|
||||
line = self._lines.pop()
|
||||
# RFC 2046, section 5.1.2 requires us to recognize outer level
|
||||
# boundaries at any level of inner nesting. Do this, but be sure it's
|
||||
# in the order of most to least nested.
|
||||
for ateof in self._eofstack[::-1]:
|
||||
if ateof(line):
|
||||
# We're at the false EOF. But push the last line back first.
|
||||
self._lines.append(line)
|
||||
return ''
|
||||
return line
|
||||
|
||||
def unreadline(self, line):
|
||||
# Let the consumer push a line back into the buffer.
|
||||
assert line is not NeedMoreData
|
||||
self._lines.append(line)
|
||||
|
||||
def push(self, data):
|
||||
"""Push some new data into this object."""
|
||||
# Handle any previous leftovers
|
||||
data, self._partial = self._partial + data, ''
|
||||
# Crack into lines, but preserve the newlines on the end of each
|
||||
parts = NLCRE_crack.split(data)
|
||||
# The *ahem* interesting behaviour of re.split when supplied grouping
|
||||
# parentheses is that the last element of the resulting list is the
|
||||
# data after the final RE. In the case of a NL/CR terminated string,
|
||||
# this is the empty string.
|
||||
self._partial = parts.pop()
|
||||
#GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r:
|
||||
# is there a \n to follow later?
|
||||
if not self._partial and parts and parts[-1].endswith('\r'):
|
||||
self._partial = parts.pop(-2)+parts.pop()
|
||||
# parts is a list of strings, alternating between the line contents
|
||||
# and the eol character(s). Gather up a list of lines after
|
||||
# re-attaching the newlines.
|
||||
lines = []
|
||||
for i in range(len(parts) // 2):
|
||||
lines.append(parts[i*2] + parts[i*2+1])
|
||||
self.pushlines(lines)
|
||||
|
||||
def pushlines(self, lines):
|
||||
# Reverse and insert at the front of the lines.
|
||||
self._lines[:0] = lines[::-1]
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
line = self.readline()
|
||||
if line == '':
|
||||
raise StopIteration
|
||||
return line
|
||||
|
||||
|
||||
|
||||
class FeedParser:
|
||||
"""A feed-style parser of email."""
|
||||
|
||||
def __init__(self, _factory=message.Message, policy=compat32):
|
||||
"""_factory is called with no arguments to create a new message obj
|
||||
|
||||
The policy keyword specifies a policy object that controls a number of
|
||||
aspects of the parser's operation. The default policy maintains
|
||||
backward compatibility.
|
||||
|
||||
"""
|
||||
self._factory = _factory
|
||||
self.policy = policy
|
||||
try:
|
||||
_factory(policy=self.policy)
|
||||
self._factory_kwds = lambda: {'policy': self.policy}
|
||||
except TypeError:
|
||||
# Assume this is an old-style factory
|
||||
self._factory_kwds = lambda: {}
|
||||
self._input = BufferedSubFile()
|
||||
self._msgstack = []
|
||||
self._parse = self._parsegen().__next__
|
||||
self._cur = None
|
||||
self._last = None
|
||||
self._headersonly = False
|
||||
|
||||
# Non-public interface for supporting Parser's headersonly flag
|
||||
def _set_headersonly(self):
|
||||
self._headersonly = True
|
||||
|
||||
def feed(self, data):
|
||||
"""Push more data into the parser."""
|
||||
self._input.push(data)
|
||||
self._call_parse()
|
||||
|
||||
def _call_parse(self):
|
||||
try:
|
||||
self._parse()
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
def close(self):
|
||||
"""Parse all remaining data and return the root message object."""
|
||||
self._input.close()
|
||||
self._call_parse()
|
||||
root = self._pop_message()
|
||||
assert not self._msgstack
|
||||
# Look for final set of defects
|
||||
if root.get_content_maintype() == 'multipart' \
|
||||
and not root.is_multipart():
|
||||
defect = errors.MultipartInvariantViolationDefect()
|
||||
self.policy.handle_defect(root, defect)
|
||||
return root
|
||||
|
||||
def _new_message(self):
|
||||
msg = self._factory(**self._factory_kwds())
|
||||
if self._cur and self._cur.get_content_type() == 'multipart/digest':
|
||||
msg.set_default_type('message/rfc822')
|
||||
if self._msgstack:
|
||||
self._msgstack[-1].attach(msg)
|
||||
self._msgstack.append(msg)
|
||||
self._cur = msg
|
||||
self._last = msg
|
||||
|
||||
def _pop_message(self):
|
||||
retval = self._msgstack.pop()
|
||||
if self._msgstack:
|
||||
self._cur = self._msgstack[-1]
|
||||
else:
|
||||
self._cur = None
|
||||
return retval
|
||||
|
||||
def _parsegen(self):
|
||||
# Create a new message and start by parsing headers.
|
||||
self._new_message()
|
||||
headers = []
|
||||
# Collect the headers, searching for a line that doesn't match the RFC
|
||||
# 2822 header or continuation pattern (including an empty line).
|
||||
for line in self._input:
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
if not headerRE.match(line):
|
||||
# If we saw the RFC defined header/body separator
|
||||
# (i.e. newline), just throw it away. Otherwise the line is
|
||||
# part of the body so push it back.
|
||||
if not NLCRE.match(line):
|
||||
defect = errors.MissingHeaderBodySeparatorDefect()
|
||||
self.policy.handle_defect(self._cur, defect)
|
||||
self._input.unreadline(line)
|
||||
break
|
||||
headers.append(line)
|
||||
# Done with the headers, so parse them and figure out what we're
|
||||
# supposed to see in the body of the message.
|
||||
self._parse_headers(headers)
|
||||
# Headers-only parsing is a backwards compatibility hack, which was
|
||||
# necessary in the older parser, which could raise errors. All
|
||||
# remaining lines in the input are thrown into the message body.
|
||||
if self._headersonly:
|
||||
lines = []
|
||||
while True:
|
||||
line = self._input.readline()
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
if line == '':
|
||||
break
|
||||
lines.append(line)
|
||||
self._cur.set_payload(EMPTYSTRING.join(lines))
|
||||
return
|
||||
if self._cur.get_content_type() == 'message/delivery-status':
|
||||
# message/delivery-status contains blocks of headers separated by
|
||||
# a blank line. We'll represent each header block as a separate
|
||||
# nested message object, but the processing is a bit different
|
||||
# than standard message/* types because there is no body for the
|
||||
# nested messages. A blank line separates the subparts.
|
||||
while True:
|
||||
self._input.push_eof_matcher(NLCRE.match)
|
||||
for retval in self._parsegen():
|
||||
if retval is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
break
|
||||
msg = self._pop_message()
|
||||
# We need to pop the EOF matcher in order to tell if we're at
|
||||
# the end of the current file, not the end of the last block
|
||||
# of message headers.
|
||||
self._input.pop_eof_matcher()
|
||||
# The input stream must be sitting at the newline or at the
|
||||
# EOF. We want to see if we're at the end of this subpart, so
|
||||
# first consume the blank line, then test the next line to see
|
||||
# if we're at this subpart's EOF.
|
||||
while True:
|
||||
line = self._input.readline()
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
break
|
||||
while True:
|
||||
line = self._input.readline()
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
break
|
||||
if line == '':
|
||||
break
|
||||
# Not at EOF so this is a line we're going to need.
|
||||
self._input.unreadline(line)
|
||||
return
|
||||
if self._cur.get_content_maintype() == 'message':
|
||||
# The message claims to be a message/* type, then what follows is
|
||||
# another RFC 2822 message.
|
||||
for retval in self._parsegen():
|
||||
if retval is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
break
|
||||
self._pop_message()
|
||||
return
|
||||
if self._cur.get_content_maintype() == 'multipart':
|
||||
boundary = self._cur.get_boundary()
|
||||
if boundary is None:
|
||||
# The message /claims/ to be a multipart but it has not
|
||||
# defined a boundary. That's a problem which we'll handle by
|
||||
# reading everything until the EOF and marking the message as
|
||||
# defective.
|
||||
defect = errors.NoBoundaryInMultipartDefect()
|
||||
self.policy.handle_defect(self._cur, defect)
|
||||
lines = []
|
||||
for line in self._input:
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
lines.append(line)
|
||||
self._cur.set_payload(EMPTYSTRING.join(lines))
|
||||
return
|
||||
# Make sure a valid content type was specified per RFC 2045:6.4.
|
||||
if (self._cur.get('content-transfer-encoding', '8bit').lower()
|
||||
not in ('7bit', '8bit', 'binary')):
|
||||
defect = errors.InvalidMultipartContentTransferEncodingDefect()
|
||||
self.policy.handle_defect(self._cur, defect)
|
||||
# Create a line match predicate which matches the inter-part
|
||||
# boundary as well as the end-of-multipart boundary. Don't push
|
||||
# this onto the input stream until we've scanned past the
|
||||
# preamble.
|
||||
separator = '--' + boundary
|
||||
boundaryre = re.compile(
|
||||
'(?P<sep>' + re.escape(separator) +
|
||||
r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
|
||||
capturing_preamble = True
|
||||
preamble = []
|
||||
linesep = False
|
||||
close_boundary_seen = False
|
||||
while True:
|
||||
line = self._input.readline()
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
if line == '':
|
||||
break
|
||||
mo = boundaryre.match(line)
|
||||
if mo:
|
||||
# If we're looking at the end boundary, we're done with
|
||||
# this multipart. If there was a newline at the end of
|
||||
# the closing boundary, then we need to initialize the
|
||||
# epilogue with the empty string (see below).
|
||||
if mo.group('end'):
|
||||
close_boundary_seen = True
|
||||
linesep = mo.group('linesep')
|
||||
break
|
||||
# We saw an inter-part boundary. Were we in the preamble?
|
||||
if capturing_preamble:
|
||||
if preamble:
|
||||
# According to RFC 2046, the last newline belongs
|
||||
# to the boundary.
|
||||
lastline = preamble[-1]
|
||||
eolmo = NLCRE_eol.search(lastline)
|
||||
if eolmo:
|
||||
preamble[-1] = lastline[:-len(eolmo.group(0))]
|
||||
self._cur.preamble = EMPTYSTRING.join(preamble)
|
||||
capturing_preamble = False
|
||||
self._input.unreadline(line)
|
||||
continue
|
||||
# We saw a boundary separating two parts. Consume any
|
||||
# multiple boundary lines that may be following. Our
|
||||
# interpretation of RFC 2046 BNF grammar does not produce
|
||||
# body parts within such double boundaries.
|
||||
while True:
|
||||
line = self._input.readline()
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
mo = boundaryre.match(line)
|
||||
if not mo:
|
||||
self._input.unreadline(line)
|
||||
break
|
||||
# Recurse to parse this subpart; the input stream points
|
||||
# at the subpart's first line.
|
||||
self._input.push_eof_matcher(boundaryre.match)
|
||||
for retval in self._parsegen():
|
||||
if retval is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
break
|
||||
# Because of RFC 2046, the newline preceding the boundary
|
||||
# separator actually belongs to the boundary, not the
|
||||
# previous subpart's payload (or epilogue if the previous
|
||||
# part is a multipart).
|
||||
if self._last.get_content_maintype() == 'multipart':
|
||||
epilogue = self._last.epilogue
|
||||
if epilogue == '':
|
||||
self._last.epilogue = None
|
||||
elif epilogue is not None:
|
||||
mo = NLCRE_eol.search(epilogue)
|
||||
if mo:
|
||||
end = len(mo.group(0))
|
||||
self._last.epilogue = epilogue[:-end]
|
||||
else:
|
||||
payload = self._last._payload
|
||||
if isinstance(payload, str):
|
||||
mo = NLCRE_eol.search(payload)
|
||||
if mo:
|
||||
payload = payload[:-len(mo.group(0))]
|
||||
self._last._payload = payload
|
||||
self._input.pop_eof_matcher()
|
||||
self._pop_message()
|
||||
# Set the multipart up for newline cleansing, which will
|
||||
# happen if we're in a nested multipart.
|
||||
self._last = self._cur
|
||||
else:
|
||||
# I think we must be in the preamble
|
||||
assert capturing_preamble
|
||||
preamble.append(line)
|
||||
# We've seen either the EOF or the end boundary. If we're still
|
||||
# capturing the preamble, we never saw the start boundary. Note
|
||||
# that as a defect and store the captured text as the payload.
|
||||
if capturing_preamble:
|
||||
defect = errors.StartBoundaryNotFoundDefect()
|
||||
self.policy.handle_defect(self._cur, defect)
|
||||
self._cur.set_payload(EMPTYSTRING.join(preamble))
|
||||
epilogue = []
|
||||
for line in self._input:
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
self._cur.epilogue = EMPTYSTRING.join(epilogue)
|
||||
return
|
||||
# If we're not processing the preamble, then we might have seen
|
||||
# EOF without seeing that end boundary...that is also a defect.
|
||||
if not close_boundary_seen:
|
||||
defect = errors.CloseBoundaryNotFoundDefect()
|
||||
self.policy.handle_defect(self._cur, defect)
|
||||
return
|
||||
# Everything from here to the EOF is epilogue. If the end boundary
|
||||
# ended in a newline, we'll need to make sure the epilogue isn't
|
||||
# None
|
||||
if linesep:
|
||||
epilogue = ['']
|
||||
else:
|
||||
epilogue = []
|
||||
for line in self._input:
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
epilogue.append(line)
|
||||
# Any CRLF at the front of the epilogue is not technically part of
|
||||
# the epilogue. Also, watch out for an empty string epilogue,
|
||||
# which means a single newline.
|
||||
if epilogue:
|
||||
firstline = epilogue[0]
|
||||
bolmo = NLCRE_bol.match(firstline)
|
||||
if bolmo:
|
||||
epilogue[0] = firstline[len(bolmo.group(0)):]
|
||||
self._cur.epilogue = EMPTYSTRING.join(epilogue)
|
||||
return
|
||||
# Otherwise, it's some non-multipart type, so the entire rest of the
|
||||
# file contents becomes the payload.
|
||||
lines = []
|
||||
for line in self._input:
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
lines.append(line)
|
||||
self._cur.set_payload(EMPTYSTRING.join(lines))
|
||||
|
||||
def _parse_headers(self, lines):
|
||||
# Passed a list of lines that make up the headers for the current msg
|
||||
lastheader = ''
|
||||
lastvalue = []
|
||||
for lineno, line in enumerate(lines):
|
||||
# Check for continuation
|
||||
if line[0] in ' \t':
|
||||
if not lastheader:
|
||||
# The first line of the headers was a continuation. This
|
||||
# is illegal, so let's note the defect, store the illegal
|
||||
# line, and ignore it for purposes of headers.
|
||||
defect = errors.FirstHeaderLineIsContinuationDefect(line)
|
||||
self.policy.handle_defect(self._cur, defect)
|
||||
continue
|
||||
lastvalue.append(line)
|
||||
continue
|
||||
if lastheader:
|
||||
self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
|
||||
lastheader, lastvalue = '', []
|
||||
# Check for envelope header, i.e. unix-from
|
||||
if line.startswith('From '):
|
||||
if lineno == 0:
|
||||
# Strip off the trailing newline
|
||||
mo = NLCRE_eol.search(line)
|
||||
if mo:
|
||||
line = line[:-len(mo.group(0))]
|
||||
self._cur.set_unixfrom(line)
|
||||
continue
|
||||
elif lineno == len(lines) - 1:
|
||||
# Something looking like a unix-from at the end - it's
|
||||
# probably the first line of the body, so push back the
|
||||
# line and stop.
|
||||
self._input.unreadline(line)
|
||||
return
|
||||
else:
|
||||
# Weirdly placed unix-from line. Note this as a defect
|
||||
# and ignore it.
|
||||
defect = errors.MisplacedEnvelopeHeaderDefect(line)
|
||||
self._cur.defects.append(defect)
|
||||
continue
|
||||
# Split the line on the colon separating field name from value.
|
||||
# There will always be a colon, because if there wasn't the part of
|
||||
# the parser that calls us would have started parsing the body.
|
||||
i = line.find(':')
|
||||
assert i>0, "_parse_headers fed line with no : and no leading WS"
|
||||
lastheader = line[:i]
|
||||
lastvalue = [line]
|
||||
# Done with all the lines, so handle the last header.
|
||||
if lastheader:
|
||||
self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
|
||||
|
||||
|
||||
class BytesFeedParser(FeedParser):
|
||||
"""Like FeedParser, but feed accepts bytes."""
|
||||
|
||||
def feed(self, data):
|
||||
super().feed(data.decode('ascii', 'surrogateescape'))
|
|
@ -0,0 +1,583 @@
|
|||
# Copyright (C) 2002-2007 Python Software Foundation
|
||||
# Author: Ben Gertzfield, Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Header encoding and decoding functionality."""
|
||||
|
||||
__all__ = [
|
||||
'Header',
|
||||
'decode_header',
|
||||
'make_header',
|
||||
]
|
||||
|
||||
import re
|
||||
import binascii
|
||||
|
||||
import email.quoprimime
|
||||
import email.base64mime
|
||||
|
||||
from email.errors import HeaderParseError
|
||||
from email import charset as _charset
|
||||
Charset = _charset.Charset
|
||||
|
||||
NL = '\n'
|
||||
SPACE = ' '
|
||||
BSPACE = b' '
|
||||
SPACE8 = ' ' * 8
|
||||
EMPTYSTRING = ''
|
||||
MAXLINELEN = 78
|
||||
FWS = ' \t'
|
||||
|
||||
USASCII = Charset('us-ascii')
|
||||
UTF8 = Charset('utf-8')
|
||||
|
||||
# Match encoded-word strings in the form =?charset?q?Hello_World?=
|
||||
ecre = re.compile(r'''
|
||||
=\? # literal =?
|
||||
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
|
||||
\? # literal ?
|
||||
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
|
||||
\? # literal ?
|
||||
(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
|
||||
\?= # literal ?=
|
||||
''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
|
||||
|
||||
# Field name regexp, including trailing colon, but not separating whitespace,
|
||||
# according to RFC 2822. Character range is from tilde to exclamation mark.
|
||||
# For use with .match()
|
||||
fcre = re.compile(r'[\041-\176]+:$')
|
||||
|
||||
# Find a header embedded in a putative header value. Used to check for
|
||||
# header injection attack.
|
||||
_embeded_header = re.compile(r'\n[^ \t]+:')
|
||||
|
||||
|
||||
|
||||
# Helpers
|
||||
_max_append = email.quoprimime._max_append
|
||||
|
||||
|
||||
|
||||
def decode_header(header):
|
||||
"""Decode a message header value without converting charset.
|
||||
|
||||
Returns a list of (string, charset) pairs containing each of the decoded
|
||||
parts of the header. Charset is None for non-encoded parts of the header,
|
||||
otherwise a lower-case string containing the name of the character set
|
||||
specified in the encoded string.
|
||||
|
||||
header may be a string that may or may not contain RFC2047 encoded words,
|
||||
or it may be a Header object.
|
||||
|
||||
An email.errors.HeaderParseError may be raised when certain decoding error
|
||||
occurs (e.g. a base64 decoding exception).
|
||||
"""
|
||||
# If it is a Header object, we can just return the encoded chunks.
|
||||
if hasattr(header, '_chunks'):
|
||||
return [(_charset._encode(string, str(charset)), str(charset))
|
||||
for string, charset in header._chunks]
|
||||
# If no encoding, just return the header with no charset.
|
||||
if not ecre.search(header):
|
||||
return [(header, None)]
|
||||
# First step is to parse all the encoded parts into triplets of the form
|
||||
# (encoded_string, encoding, charset). For unencoded strings, the last
|
||||
# two parts will be None.
|
||||
words = []
|
||||
for line in header.splitlines():
|
||||
parts = ecre.split(line)
|
||||
first = True
|
||||
while parts:
|
||||
unencoded = parts.pop(0)
|
||||
if first:
|
||||
unencoded = unencoded.lstrip()
|
||||
first = False
|
||||
if unencoded:
|
||||
words.append((unencoded, None, None))
|
||||
if parts:
|
||||
charset = parts.pop(0).lower()
|
||||
encoding = parts.pop(0).lower()
|
||||
encoded = parts.pop(0)
|
||||
words.append((encoded, encoding, charset))
|
||||
# Now loop over words and remove words that consist of whitespace
|
||||
# between two encoded strings.
|
||||
import sys
|
||||
droplist = []
|
||||
for n, w in enumerate(words):
|
||||
if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace():
|
||||
droplist.append(n-1)
|
||||
for d in reversed(droplist):
|
||||
del words[d]
|
||||
|
||||
# The next step is to decode each encoded word by applying the reverse
|
||||
# base64 or quopri transformation. decoded_words is now a list of the
|
||||
# form (decoded_word, charset).
|
||||
decoded_words = []
|
||||
for encoded_string, encoding, charset in words:
|
||||
if encoding is None:
|
||||
# This is an unencoded word.
|
||||
decoded_words.append((encoded_string, charset))
|
||||
elif encoding == 'q':
|
||||
word = email.quoprimime.header_decode(encoded_string)
|
||||
decoded_words.append((word, charset))
|
||||
elif encoding == 'b':
|
||||
paderr = len(encoded_string) % 4 # Postel's law: add missing padding
|
||||
if paderr:
|
||||
encoded_string += '==='[:4 - paderr]
|
||||
try:
|
||||
word = email.base64mime.decode(encoded_string)
|
||||
except binascii.Error:
|
||||
raise HeaderParseError('Base64 decoding error')
|
||||
else:
|
||||
decoded_words.append((word, charset))
|
||||
else:
|
||||
raise AssertionError('Unexpected encoding: ' + encoding)
|
||||
# Now convert all words to bytes and collapse consecutive runs of
|
||||
# similarly encoded words.
|
||||
collapsed = []
|
||||
last_word = last_charset = None
|
||||
for word, charset in decoded_words:
|
||||
if isinstance(word, str):
|
||||
word = bytes(word, 'raw-unicode-escape')
|
||||
if last_word is None:
|
||||
last_word = word
|
||||
last_charset = charset
|
||||
elif charset != last_charset:
|
||||
collapsed.append((last_word, last_charset))
|
||||
last_word = word
|
||||
last_charset = charset
|
||||
elif last_charset is None:
|
||||
last_word += BSPACE + word
|
||||
else:
|
||||
last_word += word
|
||||
collapsed.append((last_word, last_charset))
|
||||
return collapsed
|
||||
|
||||
|
||||
|
||||
def make_header(decoded_seq, maxlinelen=None, header_name=None,
|
||||
continuation_ws=' '):
|
||||
"""Create a Header from a sequence of pairs as returned by decode_header()
|
||||
|
||||
decode_header() takes a header value string and returns a sequence of
|
||||
pairs of the format (decoded_string, charset) where charset is the string
|
||||
name of the character set.
|
||||
|
||||
This function takes one of those sequence of pairs and returns a Header
|
||||
instance. Optional maxlinelen, header_name, and continuation_ws are as in
|
||||
the Header constructor.
|
||||
"""
|
||||
h = Header(maxlinelen=maxlinelen, header_name=header_name,
|
||||
continuation_ws=continuation_ws)
|
||||
for s, charset in decoded_seq:
|
||||
# None means us-ascii but we can simply pass it on to h.append()
|
||||
if charset is not None and not isinstance(charset, Charset):
|
||||
charset = Charset(charset)
|
||||
h.append(s, charset)
|
||||
return h
|
||||
|
||||
|
||||
|
||||
class Header:
|
||||
def __init__(self, s=None, charset=None,
|
||||
maxlinelen=None, header_name=None,
|
||||
continuation_ws=' ', errors='strict'):
|
||||
"""Create a MIME-compliant header that can contain many character sets.
|
||||
|
||||
Optional s is the initial header value. If None, the initial header
|
||||
value is not set. You can later append to the header with .append()
|
||||
method calls. s may be a byte string or a Unicode string, but see the
|
||||
.append() documentation for semantics.
|
||||
|
||||
Optional charset serves two purposes: it has the same meaning as the
|
||||
charset argument to the .append() method. It also sets the default
|
||||
character set for all subsequent .append() calls that omit the charset
|
||||
argument. If charset is not provided in the constructor, the us-ascii
|
||||
charset is used both as s's initial charset and as the default for
|
||||
subsequent .append() calls.
|
||||
|
||||
The maximum line length can be specified explicitly via maxlinelen. For
|
||||
splitting the first line to a shorter value (to account for the field
|
||||
header which isn't included in s, e.g. `Subject') pass in the name of
|
||||
the field in header_name. The default maxlinelen is 78 as recommended
|
||||
by RFC 2822.
|
||||
|
||||
continuation_ws must be RFC 2822 compliant folding whitespace (usually
|
||||
either a space or a hard tab) which will be prepended to continuation
|
||||
lines.
|
||||
|
||||
errors is passed through to the .append() call.
|
||||
"""
|
||||
if charset is None:
|
||||
charset = USASCII
|
||||
elif not isinstance(charset, Charset):
|
||||
charset = Charset(charset)
|
||||
self._charset = charset
|
||||
self._continuation_ws = continuation_ws
|
||||
self._chunks = []
|
||||
if s is not None:
|
||||
self.append(s, charset, errors)
|
||||
if maxlinelen is None:
|
||||
maxlinelen = MAXLINELEN
|
||||
self._maxlinelen = maxlinelen
|
||||
if header_name is None:
|
||||
self._headerlen = 0
|
||||
else:
|
||||
# Take the separating colon and space into account.
|
||||
self._headerlen = len(header_name) + 2
|
||||
|
||||
def __str__(self):
|
||||
"""Return the string value of the header."""
|
||||
self._normalize()
|
||||
uchunks = []
|
||||
lastcs = None
|
||||
lastspace = None
|
||||
for string, charset in self._chunks:
|
||||
# We must preserve spaces between encoded and non-encoded word
|
||||
# boundaries, which means for us we need to add a space when we go
|
||||
# from a charset to None/us-ascii, or from None/us-ascii to a
|
||||
# charset. Only do this for the second and subsequent chunks.
|
||||
# Don't add a space if the None/us-ascii string already has
|
||||
# a space (trailing or leading depending on transition)
|
||||
nextcs = charset
|
||||
if nextcs == _charset.UNKNOWN8BIT:
|
||||
original_bytes = string.encode('ascii', 'surrogateescape')
|
||||
string = original_bytes.decode('ascii', 'replace')
|
||||
if uchunks:
|
||||
hasspace = string and self._nonctext(string[0])
|
||||
if lastcs not in (None, 'us-ascii'):
|
||||
if nextcs in (None, 'us-ascii') and not hasspace:
|
||||
uchunks.append(SPACE)
|
||||
nextcs = None
|
||||
elif nextcs not in (None, 'us-ascii') and not lastspace:
|
||||
uchunks.append(SPACE)
|
||||
lastspace = string and self._nonctext(string[-1])
|
||||
lastcs = nextcs
|
||||
uchunks.append(string)
|
||||
return EMPTYSTRING.join(uchunks)
|
||||
|
||||
# Rich comparison operators for equality only. BAW: does it make sense to
|
||||
# have or explicitly disable <, <=, >, >= operators?
|
||||
def __eq__(self, other):
|
||||
# other may be a Header or a string. Both are fine so coerce
|
||||
# ourselves to a unicode (of the unencoded header value), swap the
|
||||
# args and do another comparison.
|
||||
return other == str(self)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def append(self, s, charset=None, errors='strict'):
|
||||
"""Append a string to the MIME header.
|
||||
|
||||
Optional charset, if given, should be a Charset instance or the name
|
||||
of a character set (which will be converted to a Charset instance). A
|
||||
value of None (the default) means that the charset given in the
|
||||
constructor is used.
|
||||
|
||||
s may be a byte string or a Unicode string. If it is a byte string
|
||||
(i.e. isinstance(s, str) is false), then charset is the encoding of
|
||||
that byte string, and a UnicodeError will be raised if the string
|
||||
cannot be decoded with that charset. If s is a Unicode string, then
|
||||
charset is a hint specifying the character set of the characters in
|
||||
the string. In either case, when producing an RFC 2822 compliant
|
||||
header using RFC 2047 rules, the string will be encoded using the
|
||||
output codec of the charset. If the string cannot be encoded to the
|
||||
output codec, a UnicodeError will be raised.
|
||||
|
||||
Optional `errors' is passed as the errors argument to the decode
|
||||
call if s is a byte string.
|
||||
"""
|
||||
if charset is None:
|
||||
charset = self._charset
|
||||
elif not isinstance(charset, Charset):
|
||||
charset = Charset(charset)
|
||||
if not isinstance(s, str):
|
||||
input_charset = charset.input_codec or 'us-ascii'
|
||||
if input_charset == _charset.UNKNOWN8BIT:
|
||||
s = s.decode('us-ascii', 'surrogateescape')
|
||||
else:
|
||||
s = s.decode(input_charset, errors)
|
||||
# Ensure that the bytes we're storing can be decoded to the output
|
||||
# character set, otherwise an early error is raised.
|
||||
output_charset = charset.output_codec or 'us-ascii'
|
||||
if output_charset != _charset.UNKNOWN8BIT:
|
||||
try:
|
||||
s.encode(output_charset, errors)
|
||||
except UnicodeEncodeError:
|
||||
if output_charset!='us-ascii':
|
||||
raise
|
||||
charset = UTF8
|
||||
self._chunks.append((s, charset))
|
||||
|
||||
def _nonctext(self, s):
|
||||
"""True if string s is not a ctext character of RFC822.
|
||||
"""
|
||||
return s in (' ', '\t', '(', ')', '\\')
|
||||
|
||||
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
|
||||
r"""Encode a message header into an RFC-compliant format.
|
||||
|
||||
There are many issues involved in converting a given string for use in
|
||||
an email header. Only certain character sets are readable in most
|
||||
email clients, and as header strings can only contain a subset of
|
||||
7-bit ASCII, care must be taken to properly convert and encode (with
|
||||
Base64 or quoted-printable) header strings. In addition, there is a
|
||||
75-character length limit on any given encoded header field, so
|
||||
line-wrapping must be performed, even with double-byte character sets.
|
||||
|
||||
Optional maxlinelen specifies the maximum length of each generated
|
||||
line, exclusive of the linesep string. Individual lines may be longer
|
||||
than maxlinelen if a folding point cannot be found. The first line
|
||||
will be shorter by the length of the header name plus ": " if a header
|
||||
name was specified at Header construction time. The default value for
|
||||
maxlinelen is determined at header construction time.
|
||||
|
||||
Optional splitchars is a string containing characters which should be
|
||||
given extra weight by the splitting algorithm during normal header
|
||||
wrapping. This is in very rough support of RFC 2822's `higher level
|
||||
syntactic breaks': split points preceded by a splitchar are preferred
|
||||
during line splitting, with the characters preferred in the order in
|
||||
which they appear in the string. Space and tab may be included in the
|
||||
string to indicate whether preference should be given to one over the
|
||||
other as a split point when other split chars do not appear in the line
|
||||
being split. Splitchars does not affect RFC 2047 encoded lines.
|
||||
|
||||
Optional linesep is a string to be used to separate the lines of
|
||||
the value. The default value is the most useful for typical
|
||||
Python applications, but it can be set to \r\n to produce RFC-compliant
|
||||
line separators when needed.
|
||||
"""
|
||||
self._normalize()
|
||||
if maxlinelen is None:
|
||||
maxlinelen = self._maxlinelen
|
||||
# A maxlinelen of 0 means don't wrap. For all practical purposes,
|
||||
# choosing a huge number here accomplishes that and makes the
|
||||
# _ValueFormatter algorithm much simpler.
|
||||
if maxlinelen == 0:
|
||||
maxlinelen = 1000000
|
||||
formatter = _ValueFormatter(self._headerlen, maxlinelen,
|
||||
self._continuation_ws, splitchars)
|
||||
lastcs = None
|
||||
hasspace = lastspace = None
|
||||
for string, charset in self._chunks:
|
||||
if hasspace is not None:
|
||||
hasspace = string and self._nonctext(string[0])
|
||||
import sys
|
||||
if lastcs not in (None, 'us-ascii'):
|
||||
if not hasspace or charset not in (None, 'us-ascii'):
|
||||
formatter.add_transition()
|
||||
elif charset not in (None, 'us-ascii') and not lastspace:
|
||||
formatter.add_transition()
|
||||
lastspace = string and self._nonctext(string[-1])
|
||||
lastcs = charset
|
||||
hasspace = False
|
||||
lines = string.splitlines()
|
||||
if lines:
|
||||
formatter.feed('', lines[0], charset)
|
||||
else:
|
||||
formatter.feed('', '', charset)
|
||||
for line in lines[1:]:
|
||||
formatter.newline()
|
||||
if charset.header_encoding is not None:
|
||||
formatter.feed(self._continuation_ws, ' ' + line.lstrip(),
|
||||
charset)
|
||||
else:
|
||||
sline = line.lstrip()
|
||||
fws = line[:len(line)-len(sline)]
|
||||
formatter.feed(fws, sline, charset)
|
||||
if len(lines) > 1:
|
||||
formatter.newline()
|
||||
if self._chunks:
|
||||
formatter.add_transition()
|
||||
value = formatter._str(linesep)
|
||||
if _embeded_header.search(value):
|
||||
raise HeaderParseError("header value appears to contain "
|
||||
"an embedded header: {!r}".format(value))
|
||||
return value
|
||||
|
||||
def _normalize(self):
|
||||
# Step 1: Normalize the chunks so that all runs of identical charsets
|
||||
# get collapsed into a single unicode string.
|
||||
chunks = []
|
||||
last_charset = None
|
||||
last_chunk = []
|
||||
for string, charset in self._chunks:
|
||||
if charset == last_charset:
|
||||
last_chunk.append(string)
|
||||
else:
|
||||
if last_charset is not None:
|
||||
chunks.append((SPACE.join(last_chunk), last_charset))
|
||||
last_chunk = [string]
|
||||
last_charset = charset
|
||||
if last_chunk:
|
||||
chunks.append((SPACE.join(last_chunk), last_charset))
|
||||
self._chunks = chunks
|
||||
|
||||
|
||||
|
||||
class _ValueFormatter:
|
||||
def __init__(self, headerlen, maxlen, continuation_ws, splitchars):
|
||||
self._maxlen = maxlen
|
||||
self._continuation_ws = continuation_ws
|
||||
self._continuation_ws_len = len(continuation_ws)
|
||||
self._splitchars = splitchars
|
||||
self._lines = []
|
||||
self._current_line = _Accumulator(headerlen)
|
||||
|
||||
def _str(self, linesep):
|
||||
self.newline()
|
||||
return linesep.join(self._lines)
|
||||
|
||||
def __str__(self):
|
||||
return self._str(NL)
|
||||
|
||||
def newline(self):
|
||||
end_of_line = self._current_line.pop()
|
||||
if end_of_line != (' ', ''):
|
||||
self._current_line.push(*end_of_line)
|
||||
if len(self._current_line) > 0:
|
||||
if self._current_line.is_onlyws():
|
||||
self._lines[-1] += str(self._current_line)
|
||||
else:
|
||||
self._lines.append(str(self._current_line))
|
||||
self._current_line.reset()
|
||||
|
||||
def add_transition(self):
|
||||
self._current_line.push(' ', '')
|
||||
|
||||
def feed(self, fws, string, charset):
|
||||
# If the charset has no header encoding (i.e. it is an ASCII encoding)
|
||||
# then we must split the header at the "highest level syntactic break"
|
||||
# possible. Note that we don't have a lot of smarts about field
|
||||
# syntax; we just try to break on semi-colons, then commas, then
|
||||
# whitespace. Eventually, this should be pluggable.
|
||||
if charset.header_encoding is None:
|
||||
self._ascii_split(fws, string, self._splitchars)
|
||||
return
|
||||
# Otherwise, we're doing either a Base64 or a quoted-printable
|
||||
# encoding which means we don't need to split the line on syntactic
|
||||
# breaks. We can basically just find enough characters to fit on the
|
||||
# current line, minus the RFC 2047 chrome. What makes this trickier
|
||||
# though is that we have to split at octet boundaries, not character
|
||||
# boundaries but it's only safe to split at character boundaries so at
|
||||
# best we can only get close.
|
||||
encoded_lines = charset.header_encode_lines(string, self._maxlengths())
|
||||
# The first element extends the current line, but if it's None then
|
||||
# nothing more fit on the current line so start a new line.
|
||||
try:
|
||||
first_line = encoded_lines.pop(0)
|
||||
except IndexError:
|
||||
# There are no encoded lines, so we're done.
|
||||
return
|
||||
if first_line is not None:
|
||||
self._append_chunk(fws, first_line)
|
||||
try:
|
||||
last_line = encoded_lines.pop()
|
||||
except IndexError:
|
||||
# There was only one line.
|
||||
return
|
||||
self.newline()
|
||||
self._current_line.push(self._continuation_ws, last_line)
|
||||
# Everything else are full lines in themselves.
|
||||
for line in encoded_lines:
|
||||
self._lines.append(self._continuation_ws + line)
|
||||
|
||||
def _maxlengths(self):
|
||||
# The first line's length.
|
||||
yield self._maxlen - len(self._current_line)
|
||||
while True:
|
||||
yield self._maxlen - self._continuation_ws_len
|
||||
|
||||
def _ascii_split(self, fws, string, splitchars):
|
||||
# The RFC 2822 header folding algorithm is simple in principle but
|
||||
# complex in practice. Lines may be folded any place where "folding
|
||||
# white space" appears by inserting a linesep character in front of the
|
||||
# FWS. The complication is that not all spaces or tabs qualify as FWS,
|
||||
# and we are also supposed to prefer to break at "higher level
|
||||
# syntactic breaks". We can't do either of these without intimate
|
||||
# knowledge of the structure of structured headers, which we don't have
|
||||
# here. So the best we can do here is prefer to break at the specified
|
||||
# splitchars, and hope that we don't choose any spaces or tabs that
|
||||
# aren't legal FWS. (This is at least better than the old algorithm,
|
||||
# where we would sometimes *introduce* FWS after a splitchar, or the
|
||||
# algorithm before that, where we would turn all white space runs into
|
||||
# single spaces or tabs.)
|
||||
parts = re.split("(["+FWS+"]+)", fws+string)
|
||||
if parts[0]:
|
||||
parts[:0] = ['']
|
||||
else:
|
||||
parts.pop(0)
|
||||
for fws, part in zip(*[iter(parts)]*2):
|
||||
self._append_chunk(fws, part)
|
||||
|
||||
def _append_chunk(self, fws, string):
|
||||
self._current_line.push(fws, string)
|
||||
if len(self._current_line) > self._maxlen:
|
||||
# Find the best split point, working backward from the end.
|
||||
# There might be none, on a long first line.
|
||||
for ch in self._splitchars:
|
||||
for i in range(self._current_line.part_count()-1, 0, -1):
|
||||
if ch.isspace():
|
||||
fws = self._current_line[i][0]
|
||||
if fws and fws[0]==ch:
|
||||
break
|
||||
prevpart = self._current_line[i-1][1]
|
||||
if prevpart and prevpart[-1]==ch:
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
else:
|
||||
fws, part = self._current_line.pop()
|
||||
if self._current_line._initial_size > 0:
|
||||
# There will be a header, so leave it on a line by itself.
|
||||
self.newline()
|
||||
if not fws:
|
||||
# We don't use continuation_ws here because the whitespace
|
||||
# after a header should always be a space.
|
||||
fws = ' '
|
||||
self._current_line.push(fws, part)
|
||||
return
|
||||
remainder = self._current_line.pop_from(i)
|
||||
self._lines.append(str(self._current_line))
|
||||
self._current_line.reset(remainder)
|
||||
|
||||
|
||||
class _Accumulator(list):
|
||||
|
||||
def __init__(self, initial_size=0):
|
||||
self._initial_size = initial_size
|
||||
super().__init__()
|
||||
|
||||
def push(self, fws, string):
|
||||
self.append((fws, string))
|
||||
|
||||
def pop_from(self, i=0):
|
||||
popped = self[i:]
|
||||
self[i:] = []
|
||||
return popped
|
||||
|
||||
def pop(self):
|
||||
if self.part_count()==0:
|
||||
return ('', '')
|
||||
return super().pop()
|
||||
|
||||
def __len__(self):
|
||||
return sum((len(fws)+len(part) for fws, part in self),
|
||||
self._initial_size)
|
||||
|
||||
def __str__(self):
|
||||
return EMPTYSTRING.join((EMPTYSTRING.join((fws, part))
|
||||
for fws, part in self))
|
||||
|
||||
def reset(self, startval=None):
|
||||
if startval is None:
|
||||
startval = []
|
||||
self[:] = startval
|
||||
self._initial_size = 0
|
||||
|
||||
def is_onlyws(self):
|
||||
return self._initial_size==0 and (not self or str(self).isspace())
|
||||
|
||||
def part_count(self):
|
||||
return super().__len__()
|
|
@ -0,0 +1,73 @@
|
|||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Various types of useful iterators and generators."""
|
||||
|
||||
__all__ = [
|
||||
'body_line_iterator',
|
||||
'typed_subpart_iterator',
|
||||
'walk',
|
||||
# Do not include _structure() since it's part of the debugging API.
|
||||
]
|
||||
|
||||
import sys
|
||||
from io import StringIO
|
||||
|
||||
|
||||
|
||||
# This function will become a method of the Message class
|
||||
def walk(self):
|
||||
"""Walk over the message tree, yielding each subpart.
|
||||
|
||||
The walk is performed in depth-first order. This method is a
|
||||
generator.
|
||||
"""
|
||||
yield self
|
||||
if self.is_multipart():
|
||||
for subpart in self.get_payload():
|
||||
for subsubpart in subpart.walk():
|
||||
yield subsubpart
|
||||
|
||||
|
||||
|
||||
# These two functions are imported into the Iterators.py interface module.
|
||||
def body_line_iterator(msg, decode=False):
|
||||
"""Iterate over the parts, returning string payloads line-by-line.
|
||||
|
||||
Optional decode (default False) is passed through to .get_payload().
|
||||
"""
|
||||
for subpart in msg.walk():
|
||||
payload = subpart.get_payload(decode=decode)
|
||||
if isinstance(payload, str):
|
||||
for line in StringIO(payload):
|
||||
yield line
|
||||
|
||||
|
||||
def typed_subpart_iterator(msg, maintype='text', subtype=None):
|
||||
"""Iterate over the subparts with a given MIME type.
|
||||
|
||||
Use `maintype' as the main MIME type to match against; this defaults to
|
||||
"text". Optional `subtype' is the MIME subtype to match against; if
|
||||
omitted, only the main type is matched.
|
||||
"""
|
||||
for subpart in msg.walk():
|
||||
if subpart.get_content_maintype() == maintype:
|
||||
if subtype is None or subpart.get_content_subtype() == subtype:
|
||||
yield subpart
|
||||
|
||||
|
||||
|
||||
def _structure(msg, fp=None, level=0, include_default=False):
|
||||
"""A handy debugging aid"""
|
||||
if fp is None:
|
||||
fp = sys.stdout
|
||||
tab = ' ' * (level * 4)
|
||||
print(tab + msg.get_content_type(), end='', file=fp)
|
||||
if include_default:
|
||||
print(' [%s]' % msg.get_default_type(), file=fp)
|
||||
else:
|
||||
print(file=fp)
|
||||
if msg.is_multipart():
|
||||
for subpart in msg.get_payload():
|
||||
_structure(subpart, fp, level+1, include_default)
|
|
@ -0,0 +1,879 @@
|
|||
# Copyright (C) 2001-2007 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Basic message object for the email package object model."""
|
||||
|
||||
__all__ = ['Message']
|
||||
|
||||
import re
|
||||
import uu
|
||||
import base64
|
||||
import binascii
|
||||
from io import BytesIO, StringIO
|
||||
|
||||
# Intrapackage imports
|
||||
from email import utils
|
||||
from email import errors
|
||||
from email._policybase import compat32
|
||||
from email import charset as _charset
|
||||
from email._encoded_words import decode_b
|
||||
Charset = _charset.Charset
|
||||
|
||||
SEMISPACE = '; '
|
||||
|
||||
# Regular expression that matches `special' characters in parameters, the
|
||||
# existence of which force quoting of the parameter value.
|
||||
tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
|
||||
|
||||
|
||||
def _splitparam(param):
|
||||
# Split header parameters. BAW: this may be too simple. It isn't
|
||||
# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
|
||||
# found in the wild. We may eventually need a full fledged parser.
|
||||
# RDM: we might have a Header here; for now just stringify it.
|
||||
a, sep, b = str(param).partition(';')
|
||||
if not sep:
|
||||
return a.strip(), None
|
||||
return a.strip(), b.strip()
|
||||
|
||||
def _formatparam(param, value=None, quote=True):
|
||||
"""Convenience function to format and return a key=value pair.
|
||||
|
||||
This will quote the value if needed or if quote is true. If value is a
|
||||
three tuple (charset, language, value), it will be encoded according
|
||||
to RFC2231 rules. If it contains non-ascii characters it will likewise
|
||||
be encoded according to RFC2231 rules, using the utf-8 charset and
|
||||
a null language.
|
||||
"""
|
||||
if value is not None and len(value) > 0:
|
||||
# A tuple is used for RFC 2231 encoded parameter values where items
|
||||
# are (charset, language, value). charset is a string, not a Charset
|
||||
# instance. RFC 2231 encoded values are never quoted, per RFC.
|
||||
if isinstance(value, tuple):
|
||||
# Encode as per RFC 2231
|
||||
param += '*'
|
||||
value = utils.encode_rfc2231(value[2], value[0], value[1])
|
||||
return '%s=%s' % (param, value)
|
||||
else:
|
||||
try:
|
||||
value.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
param += '*'
|
||||
value = utils.encode_rfc2231(value, 'utf-8', '')
|
||||
return '%s=%s' % (param, value)
|
||||
# BAW: Please check this. I think that if quote is set it should
|
||||
# force quoting even if not necessary.
|
||||
if quote or tspecials.search(value):
|
||||
return '%s="%s"' % (param, utils.quote(value))
|
||||
else:
|
||||
return '%s=%s' % (param, value)
|
||||
else:
|
||||
return param
|
||||
|
||||
def _parseparam(s):
|
||||
# RDM This might be a Header, so for now stringify it.
|
||||
s = ';' + str(s)
|
||||
plist = []
|
||||
while s[:1] == ';':
|
||||
s = s[1:]
|
||||
end = s.find(';')
|
||||
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
|
||||
end = s.find(';', end + 1)
|
||||
if end < 0:
|
||||
end = len(s)
|
||||
f = s[:end]
|
||||
if '=' in f:
|
||||
i = f.index('=')
|
||||
f = f[:i].strip().lower() + '=' + f[i+1:].strip()
|
||||
plist.append(f.strip())
|
||||
s = s[end:]
|
||||
return plist
|
||||
|
||||
|
||||
def _unquotevalue(value):
|
||||
# This is different than utils.collapse_rfc2231_value() because it doesn't
|
||||
# try to convert the value to a unicode. Message.get_param() and
|
||||
# Message.get_params() are both currently defined to return the tuple in
|
||||
# the face of RFC 2231 parameters.
|
||||
if isinstance(value, tuple):
|
||||
return value[0], value[1], utils.unquote(value[2])
|
||||
else:
|
||||
return utils.unquote(value)
|
||||
|
||||
|
||||
|
||||
class Message:
|
||||
"""Basic message object.
|
||||
|
||||
A message object is defined as something that has a bunch of RFC 2822
|
||||
headers and a payload. It may optionally have an envelope header
|
||||
(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
|
||||
multipart or a message/rfc822), then the payload is a list of Message
|
||||
objects, otherwise it is a string.
|
||||
|
||||
Message objects implement part of the `mapping' interface, which assumes
|
||||
there is exactly one occurrence of the header per message. Some headers
|
||||
do in fact appear multiple times (e.g. Received) and for those headers,
|
||||
you must use the explicit API to set or get all the headers. Not all of
|
||||
the mapping methods are implemented.
|
||||
"""
|
||||
def __init__(self, policy=compat32):
|
||||
self.policy = policy
|
||||
self._headers = []
|
||||
self._unixfrom = None
|
||||
self._payload = None
|
||||
self._charset = None
|
||||
# Defaults for multipart messages
|
||||
self.preamble = self.epilogue = None
|
||||
self.defects = []
|
||||
# Default content type
|
||||
self._default_type = 'text/plain'
|
||||
|
||||
def __str__(self):
|
||||
"""Return the entire formatted message as a string.
|
||||
This includes the headers, body, and envelope header.
|
||||
"""
|
||||
return self.as_string()
|
||||
|
||||
def as_string(self, unixfrom=False, maxheaderlen=0):
|
||||
"""Return the entire formatted message as a string.
|
||||
Optional `unixfrom' when True, means include the Unix From_ envelope
|
||||
header.
|
||||
|
||||
This is a convenience method and may not generate the message exactly
|
||||
as you intend. For more flexibility, use the flatten() method of a
|
||||
Generator instance.
|
||||
"""
|
||||
from email.generator import Generator
|
||||
fp = StringIO()
|
||||
g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
|
||||
g.flatten(self, unixfrom=unixfrom)
|
||||
return fp.getvalue()
|
||||
|
||||
def is_multipart(self):
|
||||
"""Return True if the message consists of multiple parts."""
|
||||
return isinstance(self._payload, list)
|
||||
|
||||
#
|
||||
# Unix From_ line
|
||||
#
|
||||
def set_unixfrom(self, unixfrom):
|
||||
self._unixfrom = unixfrom
|
||||
|
||||
def get_unixfrom(self):
|
||||
return self._unixfrom
|
||||
|
||||
#
|
||||
# Payload manipulation.
|
||||
#
|
||||
def attach(self, payload):
|
||||
"""Add the given payload to the current payload.
|
||||
|
||||
The current payload will always be a list of objects after this method
|
||||
is called. If you want to set the payload to a scalar object, use
|
||||
set_payload() instead.
|
||||
"""
|
||||
if self._payload is None:
|
||||
self._payload = [payload]
|
||||
else:
|
||||
self._payload.append(payload)
|
||||
|
||||
def get_payload(self, i=None, decode=False):
|
||||
"""Return a reference to the payload.
|
||||
|
||||
The payload will either be a list object or a string. If you mutate
|
||||
the list object, you modify the message's payload in place. Optional
|
||||
i returns that index into the payload.
|
||||
|
||||
Optional decode is a flag indicating whether the payload should be
|
||||
decoded or not, according to the Content-Transfer-Encoding header
|
||||
(default is False).
|
||||
|
||||
When True and the message is not a multipart, the payload will be
|
||||
decoded if this header's value is `quoted-printable' or `base64'. If
|
||||
some other encoding is used, or the header is missing, or if the
|
||||
payload has bogus data (i.e. bogus base64 or uuencoded data), the
|
||||
payload is returned as-is.
|
||||
|
||||
If the message is a multipart and the decode flag is True, then None
|
||||
is returned.
|
||||
"""
|
||||
# Here is the logic table for this code, based on the email5.0.0 code:
|
||||
# i decode is_multipart result
|
||||
# ------ ------ ------------ ------------------------------
|
||||
# None True True None
|
||||
# i True True None
|
||||
# None False True _payload (a list)
|
||||
# i False True _payload element i (a Message)
|
||||
# i False False error (not a list)
|
||||
# i True False error (not a list)
|
||||
# None False False _payload
|
||||
# None True False _payload decoded (bytes)
|
||||
# Note that Barry planned to factor out the 'decode' case, but that
|
||||
# isn't so easy now that we handle the 8 bit data, which needs to be
|
||||
# converted in both the decode and non-decode path.
|
||||
if self.is_multipart():
|
||||
if decode:
|
||||
return None
|
||||
if i is None:
|
||||
return self._payload
|
||||
else:
|
||||
return self._payload[i]
|
||||
# For backward compatibility, Use isinstance and this error message
|
||||
# instead of the more logical is_multipart test.
|
||||
if i is not None and not isinstance(self._payload, list):
|
||||
raise TypeError('Expected list, got %s' % type(self._payload))
|
||||
payload = self._payload
|
||||
# cte might be a Header, so for now stringify it.
|
||||
cte = str(self.get('content-transfer-encoding', '')).lower()
|
||||
# payload may be bytes here.
|
||||
if isinstance(payload, str):
|
||||
if utils._has_surrogates(payload):
|
||||
bpayload = payload.encode('ascii', 'surrogateescape')
|
||||
if not decode:
|
||||
try:
|
||||
payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
|
||||
except LookupError:
|
||||
payload = bpayload.decode('ascii', 'replace')
|
||||
elif decode:
|
||||
try:
|
||||
bpayload = payload.encode('ascii')
|
||||
except UnicodeError:
|
||||
# This won't happen for RFC compliant messages (messages
|
||||
# containing only ASCII codepoints in the unicode input).
|
||||
# If it does happen, turn the string into bytes in a way
|
||||
# guaranteed not to fail.
|
||||
bpayload = payload.encode('raw-unicode-escape')
|
||||
if not decode:
|
||||
return payload
|
||||
if cte == 'quoted-printable':
|
||||
return utils._qdecode(bpayload)
|
||||
elif cte == 'base64':
|
||||
# XXX: this is a bit of a hack; decode_b should probably be factored
|
||||
# out somewhere, but I haven't figured out where yet.
|
||||
value, defects = decode_b(b''.join(bpayload.splitlines()))
|
||||
for defect in defects:
|
||||
self.policy.handle_defect(self, defect)
|
||||
return value
|
||||
elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
|
||||
in_file = BytesIO(bpayload)
|
||||
out_file = BytesIO()
|
||||
try:
|
||||
uu.decode(in_file, out_file, quiet=True)
|
||||
return out_file.getvalue()
|
||||
except uu.Error:
|
||||
# Some decoding problem
|
||||
return bpayload
|
||||
if isinstance(payload, str):
|
||||
return bpayload
|
||||
return payload
|
||||
|
||||
def set_payload(self, payload, charset=None):
|
||||
"""Set the payload to the given value.
|
||||
|
||||
Optional charset sets the message's default character set. See
|
||||
set_charset() for details.
|
||||
"""
|
||||
if isinstance(payload, bytes):
|
||||
payload = payload.decode('ascii', 'surrogateescape')
|
||||
self._payload = payload
|
||||
if charset is not None:
|
||||
self.set_charset(charset)
|
||||
|
||||
def set_charset(self, charset):
|
||||
"""Set the charset of the payload to a given character set.
|
||||
|
||||
charset can be a Charset instance, a string naming a character set, or
|
||||
None. If it is a string it will be converted to a Charset instance.
|
||||
If charset is None, the charset parameter will be removed from the
|
||||
Content-Type field. Anything else will generate a TypeError.
|
||||
|
||||
The message will be assumed to be of type text/* encoded with
|
||||
charset.input_charset. It will be converted to charset.output_charset
|
||||
and encoded properly, if needed, when generating the plain text
|
||||
representation of the message. MIME headers (MIME-Version,
|
||||
Content-Type, Content-Transfer-Encoding) will be added as needed.
|
||||
"""
|
||||
if charset is None:
|
||||
self.del_param('charset')
|
||||
self._charset = None
|
||||
return
|
||||
if not isinstance(charset, Charset):
|
||||
charset = Charset(charset)
|
||||
self._charset = charset
|
||||
if 'MIME-Version' not in self:
|
||||
self.add_header('MIME-Version', '1.0')
|
||||
if 'Content-Type' not in self:
|
||||
self.add_header('Content-Type', 'text/plain',
|
||||
charset=charset.get_output_charset())
|
||||
else:
|
||||
self.set_param('charset', charset.get_output_charset())
|
||||
if charset != charset.get_output_charset():
|
||||
self._payload = charset.body_encode(self._payload)
|
||||
if 'Content-Transfer-Encoding' not in self:
|
||||
cte = charset.get_body_encoding()
|
||||
try:
|
||||
cte(self)
|
||||
except TypeError:
|
||||
self._payload = charset.body_encode(self._payload)
|
||||
self.add_header('Content-Transfer-Encoding', cte)
|
||||
|
||||
def get_charset(self):
|
||||
"""Return the Charset instance associated with the message's payload.
|
||||
"""
|
||||
return self._charset
|
||||
|
||||
#
|
||||
# MAPPING INTERFACE (partial)
|
||||
#
|
||||
def __len__(self):
|
||||
"""Return the total number of headers, including duplicates."""
|
||||
return len(self._headers)
|
||||
|
||||
def __getitem__(self, name):
|
||||
"""Get a header value.
|
||||
|
||||
Return None if the header is missing instead of raising an exception.
|
||||
|
||||
Note that if the header appeared multiple times, exactly which
|
||||
occurrence gets returned is undefined. Use get_all() to get all
|
||||
the values matching a header field name.
|
||||
"""
|
||||
return self.get(name)
|
||||
|
||||
def __setitem__(self, name, val):
|
||||
"""Set the value of a header.
|
||||
|
||||
Note: this does not overwrite an existing header with the same field
|
||||
name. Use __delitem__() first to delete any existing headers.
|
||||
"""
|
||||
max_count = self.policy.header_max_count(name)
|
||||
if max_count:
|
||||
lname = name.lower()
|
||||
found = 0
|
||||
for k, v in self._headers:
|
||||
if k.lower() == lname:
|
||||
found += 1
|
||||
if found >= max_count:
|
||||
raise ValueError("There may be at most {} {} headers "
|
||||
"in a message".format(max_count, name))
|
||||
self._headers.append(self.policy.header_store_parse(name, val))
|
||||
|
||||
def __delitem__(self, name):
|
||||
"""Delete all occurrences of a header, if present.
|
||||
|
||||
Does not raise an exception if the header is missing.
|
||||
"""
|
||||
name = name.lower()
|
||||
newheaders = []
|
||||
for k, v in self._headers:
|
||||
if k.lower() != name:
|
||||
newheaders.append((k, v))
|
||||
self._headers = newheaders
|
||||
|
||||
def __contains__(self, name):
|
||||
return name.lower() in [k.lower() for k, v in self._headers]
|
||||
|
||||
def __iter__(self):
|
||||
for field, value in self._headers:
|
||||
yield field
|
||||
|
||||
def keys(self):
|
||||
"""Return a list of all the message's header field names.
|
||||
|
||||
These will be sorted in the order they appeared in the original
|
||||
message, or were added to the message, and may contain duplicates.
|
||||
Any fields deleted and re-inserted are always appended to the header
|
||||
list.
|
||||
"""
|
||||
return [k for k, v in self._headers]
|
||||
|
||||
def values(self):
|
||||
"""Return a list of all the message's header values.
|
||||
|
||||
These will be sorted in the order they appeared in the original
|
||||
message, or were added to the message, and may contain duplicates.
|
||||
Any fields deleted and re-inserted are always appended to the header
|
||||
list.
|
||||
"""
|
||||
return [self.policy.header_fetch_parse(k, v)
|
||||
for k, v in self._headers]
|
||||
|
||||
def items(self):
|
||||
"""Get all the message's header fields and values.
|
||||
|
||||
These will be sorted in the order they appeared in the original
|
||||
message, or were added to the message, and may contain duplicates.
|
||||
Any fields deleted and re-inserted are always appended to the header
|
||||
list.
|
||||
"""
|
||||
return [(k, self.policy.header_fetch_parse(k, v))
|
||||
for k, v in self._headers]
|
||||
|
||||
def get(self, name, failobj=None):
|
||||
"""Get a header value.
|
||||
|
||||
Like __getitem__() but return failobj instead of None when the field
|
||||
is missing.
|
||||
"""
|
||||
name = name.lower()
|
||||
for k, v in self._headers:
|
||||
if k.lower() == name:
|
||||
return self.policy.header_fetch_parse(k, v)
|
||||
return failobj
|
||||
|
||||
#
|
||||
# "Internal" methods (public API, but only intended for use by a parser
|
||||
# or generator, not normal application code.
|
||||
#
|
||||
|
||||
def set_raw(self, name, value):
|
||||
"""Store name and value in the model without modification.
|
||||
|
||||
This is an "internal" API, intended only for use by a parser.
|
||||
"""
|
||||
self._headers.append((name, value))
|
||||
|
||||
def raw_items(self):
|
||||
"""Return the (name, value) header pairs without modification.
|
||||
|
||||
This is an "internal" API, intended only for use by a generator.
|
||||
"""
|
||||
return iter(self._headers.copy())
|
||||
|
||||
#
|
||||
# Additional useful stuff
|
||||
#
|
||||
|
||||
def get_all(self, name, failobj=None):
|
||||
"""Return a list of all the values for the named field.
|
||||
|
||||
These will be sorted in the order they appeared in the original
|
||||
message, and may contain duplicates. Any fields deleted and
|
||||
re-inserted are always appended to the header list.
|
||||
|
||||
If no such fields exist, failobj is returned (defaults to None).
|
||||
"""
|
||||
values = []
|
||||
name = name.lower()
|
||||
for k, v in self._headers:
|
||||
if k.lower() == name:
|
||||
values.append(self.policy.header_fetch_parse(k, v))
|
||||
if not values:
|
||||
return failobj
|
||||
return values
|
||||
|
||||
def add_header(self, _name, _value, **_params):
|
||||
"""Extended header setting.
|
||||
|
||||
name is the header field to add. keyword arguments can be used to set
|
||||
additional parameters for the header field, with underscores converted
|
||||
to dashes. Normally the parameter will be added as key="value" unless
|
||||
value is None, in which case only the key will be added. If a
|
||||
parameter value contains non-ASCII characters it can be specified as a
|
||||
three-tuple of (charset, language, value), in which case it will be
|
||||
encoded according to RFC2231 rules. Otherwise it will be encoded using
|
||||
the utf-8 charset and a language of ''.
|
||||
|
||||
Examples:
|
||||
|
||||
msg.add_header('content-disposition', 'attachment', filename='bud.gif')
|
||||
msg.add_header('content-disposition', 'attachment',
|
||||
filename=('utf-8', '', Fußballer.ppt'))
|
||||
msg.add_header('content-disposition', 'attachment',
|
||||
filename='Fußballer.ppt'))
|
||||
"""
|
||||
parts = []
|
||||
for k, v in _params.items():
|
||||
if v is None:
|
||||
parts.append(k.replace('_', '-'))
|
||||
else:
|
||||
parts.append(_formatparam(k.replace('_', '-'), v))
|
||||
if _value is not None:
|
||||
parts.insert(0, _value)
|
||||
self[_name] = SEMISPACE.join(parts)
|
||||
|
||||
def replace_header(self, _name, _value):
|
||||
"""Replace a header.
|
||||
|
||||
Replace the first matching header found in the message, retaining
|
||||
header order and case. If no matching header was found, a KeyError is
|
||||
raised.
|
||||
"""
|
||||
_name = _name.lower()
|
||||
for i, (k, v) in zip(range(len(self._headers)), self._headers):
|
||||
if k.lower() == _name:
|
||||
self._headers[i] = self.policy.header_store_parse(k, _value)
|
||||
break
|
||||
else:
|
||||
raise KeyError(_name)
|
||||
|
||||
#
|
||||
# Use these three methods instead of the three above.
|
||||
#
|
||||
|
||||
def get_content_type(self):
|
||||
"""Return the message's content type.
|
||||
|
||||
The returned string is coerced to lower case of the form
|
||||
`maintype/subtype'. If there was no Content-Type header in the
|
||||
message, the default type as given by get_default_type() will be
|
||||
returned. Since according to RFC 2045, messages always have a default
|
||||
type this will always return a value.
|
||||
|
||||
RFC 2045 defines a message's default type to be text/plain unless it
|
||||
appears inside a multipart/digest container, in which case it would be
|
||||
message/rfc822.
|
||||
"""
|
||||
missing = object()
|
||||
value = self.get('content-type', missing)
|
||||
if value is missing:
|
||||
# This should have no parameters
|
||||
return self.get_default_type()
|
||||
ctype = _splitparam(value)[0].lower()
|
||||
# RFC 2045, section 5.2 says if its invalid, use text/plain
|
||||
if ctype.count('/') != 1:
|
||||
return 'text/plain'
|
||||
return ctype
|
||||
|
||||
def get_content_maintype(self):
|
||||
"""Return the message's main content type.
|
||||
|
||||
This is the `maintype' part of the string returned by
|
||||
get_content_type().
|
||||
"""
|
||||
ctype = self.get_content_type()
|
||||
return ctype.split('/')[0]
|
||||
|
||||
def get_content_subtype(self):
|
||||
"""Returns the message's sub-content type.
|
||||
|
||||
This is the `subtype' part of the string returned by
|
||||
get_content_type().
|
||||
"""
|
||||
ctype = self.get_content_type()
|
||||
return ctype.split('/')[1]
|
||||
|
||||
def get_default_type(self):
|
||||
"""Return the `default' content type.
|
||||
|
||||
Most messages have a default content type of text/plain, except for
|
||||
messages that are subparts of multipart/digest containers. Such
|
||||
subparts have a default content type of message/rfc822.
|
||||
"""
|
||||
return self._default_type
|
||||
|
||||
def set_default_type(self, ctype):
|
||||
"""Set the `default' content type.
|
||||
|
||||
ctype should be either "text/plain" or "message/rfc822", although this
|
||||
is not enforced. The default content type is not stored in the
|
||||
Content-Type header.
|
||||
"""
|
||||
self._default_type = ctype
|
||||
|
||||
def _get_params_preserve(self, failobj, header):
|
||||
# Like get_params() but preserves the quoting of values. BAW:
|
||||
# should this be part of the public interface?
|
||||
missing = object()
|
||||
value = self.get(header, missing)
|
||||
if value is missing:
|
||||
return failobj
|
||||
params = []
|
||||
for p in _parseparam(value):
|
||||
try:
|
||||
name, val = p.split('=', 1)
|
||||
name = name.strip()
|
||||
val = val.strip()
|
||||
except ValueError:
|
||||
# Must have been a bare attribute
|
||||
name = p.strip()
|
||||
val = ''
|
||||
params.append((name, val))
|
||||
params = utils.decode_params(params)
|
||||
return params
|
||||
|
||||
def get_params(self, failobj=None, header='content-type', unquote=True):
|
||||
"""Return the message's Content-Type parameters, as a list.
|
||||
|
||||
The elements of the returned list are 2-tuples of key/value pairs, as
|
||||
split on the `=' sign. The left hand side of the `=' is the key,
|
||||
while the right hand side is the value. If there is no `=' sign in
|
||||
the parameter the value is the empty string. The value is as
|
||||
described in the get_param() method.
|
||||
|
||||
Optional failobj is the object to return if there is no Content-Type
|
||||
header. Optional header is the header to search instead of
|
||||
Content-Type. If unquote is True, the value is unquoted.
|
||||
"""
|
||||
missing = object()
|
||||
params = self._get_params_preserve(missing, header)
|
||||
if params is missing:
|
||||
return failobj
|
||||
if unquote:
|
||||
return [(k, _unquotevalue(v)) for k, v in params]
|
||||
else:
|
||||
return params
|
||||
|
||||
def get_param(self, param, failobj=None, header='content-type',
|
||||
unquote=True):
|
||||
"""Return the parameter value if found in the Content-Type header.
|
||||
|
||||
Optional failobj is the object to return if there is no Content-Type
|
||||
header, or the Content-Type header has no such parameter. Optional
|
||||
header is the header to search instead of Content-Type.
|
||||
|
||||
Parameter keys are always compared case insensitively. The return
|
||||
value can either be a string, or a 3-tuple if the parameter was RFC
|
||||
2231 encoded. When it's a 3-tuple, the elements of the value are of
|
||||
the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
|
||||
LANGUAGE can be None, in which case you should consider VALUE to be
|
||||
encoded in the us-ascii charset. You can usually ignore LANGUAGE.
|
||||
The parameter value (either the returned string, or the VALUE item in
|
||||
the 3-tuple) is always unquoted, unless unquote is set to False.
|
||||
|
||||
If your application doesn't care whether the parameter was RFC 2231
|
||||
encoded, it can turn the return value into a string as follows:
|
||||
|
||||
param = msg.get_param('foo')
|
||||
param = email.utils.collapse_rfc2231_value(rawparam)
|
||||
|
||||
"""
|
||||
if header not in self:
|
||||
return failobj
|
||||
for k, v in self._get_params_preserve(failobj, header):
|
||||
if k.lower() == param.lower():
|
||||
if unquote:
|
||||
return _unquotevalue(v)
|
||||
else:
|
||||
return v
|
||||
return failobj
|
||||
|
||||
def set_param(self, param, value, header='Content-Type', requote=True,
|
||||
charset=None, language=''):
|
||||
"""Set a parameter in the Content-Type header.
|
||||
|
||||
If the parameter already exists in the header, its value will be
|
||||
replaced with the new value.
|
||||
|
||||
If header is Content-Type and has not yet been defined for this
|
||||
message, it will be set to "text/plain" and the new parameter and
|
||||
value will be appended as per RFC 2045.
|
||||
|
||||
An alternate header can specified in the header argument, and all
|
||||
parameters will be quoted as necessary unless requote is False.
|
||||
|
||||
If charset is specified, the parameter will be encoded according to RFC
|
||||
2231. Optional language specifies the RFC 2231 language, defaulting
|
||||
to the empty string. Both charset and language should be strings.
|
||||
"""
|
||||
if not isinstance(value, tuple) and charset:
|
||||
value = (charset, language, value)
|
||||
|
||||
if header not in self and header.lower() == 'content-type':
|
||||
ctype = 'text/plain'
|
||||
else:
|
||||
ctype = self.get(header)
|
||||
if not self.get_param(param, header=header):
|
||||
if not ctype:
|
||||
ctype = _formatparam(param, value, requote)
|
||||
else:
|
||||
ctype = SEMISPACE.join(
|
||||
[ctype, _formatparam(param, value, requote)])
|
||||
else:
|
||||
ctype = ''
|
||||
for old_param, old_value in self.get_params(header=header,
|
||||
unquote=requote):
|
||||
append_param = ''
|
||||
if old_param.lower() == param.lower():
|
||||
append_param = _formatparam(param, value, requote)
|
||||
else:
|
||||
append_param = _formatparam(old_param, old_value, requote)
|
||||
if not ctype:
|
||||
ctype = append_param
|
||||
else:
|
||||
ctype = SEMISPACE.join([ctype, append_param])
|
||||
if ctype != self.get(header):
|
||||
del self[header]
|
||||
self[header] = ctype
|
||||
|
||||
def del_param(self, param, header='content-type', requote=True):
|
||||
"""Remove the given parameter completely from the Content-Type header.
|
||||
|
||||
The header will be re-written in place without the parameter or its
|
||||
value. All values will be quoted as necessary unless requote is
|
||||
False. Optional header specifies an alternative to the Content-Type
|
||||
header.
|
||||
"""
|
||||
if header not in self:
|
||||
return
|
||||
new_ctype = ''
|
||||
for p, v in self.get_params(header=header, unquote=requote):
|
||||
if p.lower() != param.lower():
|
||||
if not new_ctype:
|
||||
new_ctype = _formatparam(p, v, requote)
|
||||
else:
|
||||
new_ctype = SEMISPACE.join([new_ctype,
|
||||
_formatparam(p, v, requote)])
|
||||
if new_ctype != self.get(header):
|
||||
del self[header]
|
||||
self[header] = new_ctype
|
||||
|
||||
def set_type(self, type, header='Content-Type', requote=True):
|
||||
"""Set the main type and subtype for the Content-Type header.
|
||||
|
||||
type must be a string in the form "maintype/subtype", otherwise a
|
||||
ValueError is raised.
|
||||
|
||||
This method replaces the Content-Type header, keeping all the
|
||||
parameters in place. If requote is False, this leaves the existing
|
||||
header's quoting as is. Otherwise, the parameters will be quoted (the
|
||||
default).
|
||||
|
||||
An alternative header can be specified in the header argument. When
|
||||
the Content-Type header is set, we'll always also add a MIME-Version
|
||||
header.
|
||||
"""
|
||||
# BAW: should we be strict?
|
||||
if not type.count('/') == 1:
|
||||
raise ValueError
|
||||
# Set the Content-Type, you get a MIME-Version
|
||||
if header.lower() == 'content-type':
|
||||
del self['mime-version']
|
||||
self['MIME-Version'] = '1.0'
|
||||
if header not in self:
|
||||
self[header] = type
|
||||
return
|
||||
params = self.get_params(header=header, unquote=requote)
|
||||
del self[header]
|
||||
self[header] = type
|
||||
# Skip the first param; it's the old type.
|
||||
for p, v in params[1:]:
|
||||
self.set_param(p, v, header, requote)
|
||||
|
||||
def get_filename(self, failobj=None):
|
||||
"""Return the filename associated with the payload if present.
|
||||
|
||||
The filename is extracted from the Content-Disposition header's
|
||||
`filename' parameter, and it is unquoted. If that header is missing
|
||||
the `filename' parameter, this method falls back to looking for the
|
||||
`name' parameter.
|
||||
"""
|
||||
missing = object()
|
||||
filename = self.get_param('filename', missing, 'content-disposition')
|
||||
if filename is missing:
|
||||
filename = self.get_param('name', missing, 'content-type')
|
||||
if filename is missing:
|
||||
return failobj
|
||||
return utils.collapse_rfc2231_value(filename).strip()
|
||||
|
||||
def get_boundary(self, failobj=None):
|
||||
"""Return the boundary associated with the payload if present.
|
||||
|
||||
The boundary is extracted from the Content-Type header's `boundary'
|
||||
parameter, and it is unquoted.
|
||||
"""
|
||||
missing = object()
|
||||
boundary = self.get_param('boundary', missing)
|
||||
if boundary is missing:
|
||||
return failobj
|
||||
# RFC 2046 says that boundaries may begin but not end in w/s
|
||||
return utils.collapse_rfc2231_value(boundary).rstrip()
|
||||
|
||||
def set_boundary(self, boundary):
|
||||
"""Set the boundary parameter in Content-Type to 'boundary'.
|
||||
|
||||
This is subtly different than deleting the Content-Type header and
|
||||
adding a new one with a new boundary parameter via add_header(). The
|
||||
main difference is that using the set_boundary() method preserves the
|
||||
order of the Content-Type header in the original message.
|
||||
|
||||
HeaderParseError is raised if the message has no Content-Type header.
|
||||
"""
|
||||
missing = object()
|
||||
params = self._get_params_preserve(missing, 'content-type')
|
||||
if params is missing:
|
||||
# There was no Content-Type header, and we don't know what type
|
||||
# to set it to, so raise an exception.
|
||||
raise errors.HeaderParseError('No Content-Type header found')
|
||||
newparams = []
|
||||
foundp = False
|
||||
for pk, pv in params:
|
||||
if pk.lower() == 'boundary':
|
||||
newparams.append(('boundary', '"%s"' % boundary))
|
||||
foundp = True
|
||||
else:
|
||||
newparams.append((pk, pv))
|
||||
if not foundp:
|
||||
# The original Content-Type header had no boundary attribute.
|
||||
# Tack one on the end. BAW: should we raise an exception
|
||||
# instead???
|
||||
newparams.append(('boundary', '"%s"' % boundary))
|
||||
# Replace the existing Content-Type header with the new value
|
||||
newheaders = []
|
||||
for h, v in self._headers:
|
||||
if h.lower() == 'content-type':
|
||||
parts = []
|
||||
for k, v in newparams:
|
||||
if v == '':
|
||||
parts.append(k)
|
||||
else:
|
||||
parts.append('%s=%s' % (k, v))
|
||||
val = SEMISPACE.join(parts)
|
||||
newheaders.append(self.policy.header_store_parse(h, val))
|
||||
|
||||
else:
|
||||
newheaders.append((h, v))
|
||||
self._headers = newheaders
|
||||
|
||||
def get_content_charset(self, failobj=None):
|
||||
"""Return the charset parameter of the Content-Type header.
|
||||
|
||||
The returned string is always coerced to lower case. If there is no
|
||||
Content-Type header, or if that header has no charset parameter,
|
||||
failobj is returned.
|
||||
"""
|
||||
missing = object()
|
||||
charset = self.get_param('charset', missing)
|
||||
if charset is missing:
|
||||
return failobj
|
||||
if isinstance(charset, tuple):
|
||||
# RFC 2231 encoded, so decode it, and it better end up as ascii.
|
||||
pcharset = charset[0] or 'us-ascii'
|
||||
try:
|
||||
# LookupError will be raised if the charset isn't known to
|
||||
# Python. UnicodeError will be raised if the encoded text
|
||||
# contains a character not in the charset.
|
||||
as_bytes = charset[2].encode('raw-unicode-escape')
|
||||
charset = str(as_bytes, pcharset)
|
||||
except (LookupError, UnicodeError):
|
||||
charset = charset[2]
|
||||
# charset characters must be in us-ascii range
|
||||
try:
|
||||
charset.encode('us-ascii')
|
||||
except UnicodeError:
|
||||
return failobj
|
||||
# RFC 2046, $4.1.2 says charsets are not case sensitive
|
||||
return charset.lower()
|
||||
|
||||
def get_charsets(self, failobj=None):
|
||||
"""Return a list containing the charset(s) used in this message.
|
||||
|
||||
The returned list of items describes the Content-Type headers'
|
||||
charset parameter for this message and all the subparts in its
|
||||
payload.
|
||||
|
||||
Each item will either be a string (the value of the charset parameter
|
||||
in the Content-Type header of that part) or the value of the
|
||||
'failobj' parameter (defaults to None), if the part does not have a
|
||||
main MIME type of "text", or the charset is not defined.
|
||||
|
||||
The list will contain one string for each part of the message, plus
|
||||
one for the container message (i.e. self), so that a non-multipart
|
||||
message will still return a list of length 1.
|
||||
"""
|
||||
return [part.get_content_charset(failobj) for part in self.walk()]
|
||||
|
||||
# I.e. def walk(self): ...
|
||||
from email.iterators import walk
|
|
@ -0,0 +1,131 @@
|
|||
# Copyright (C) 2001-2007 Python Software Foundation
|
||||
# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""A parser of RFC 2822 and MIME email messages."""
|
||||
|
||||
__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser']
|
||||
|
||||
import warnings
|
||||
from io import StringIO, TextIOWrapper
|
||||
|
||||
from email.feedparser import FeedParser, BytesFeedParser
|
||||
from email.message import Message
|
||||
from email._policybase import compat32
|
||||
|
||||
|
||||
|
||||
class Parser:
|
||||
def __init__(self, _class=Message, policy=compat32):
|
||||
"""Parser of RFC 2822 and MIME email messages.
|
||||
|
||||
Creates an in-memory object tree representing the email message, which
|
||||
can then be manipulated and turned over to a Generator to return the
|
||||
textual representation of the message.
|
||||
|
||||
The string must be formatted as a block of RFC 2822 headers and header
|
||||
continuation lines, optionally preceeded by a `Unix-from' header. The
|
||||
header block is terminated either by the end of the string or by a
|
||||
blank line.
|
||||
|
||||
_class is the class to instantiate for new message objects when they
|
||||
must be created. This class must have a constructor that can take
|
||||
zero arguments. Default is Message.Message.
|
||||
|
||||
The policy keyword specifies a policy object that controls a number of
|
||||
aspects of the parser's operation. The default policy maintains
|
||||
backward compatibility.
|
||||
|
||||
"""
|
||||
self._class = _class
|
||||
self.policy = policy
|
||||
|
||||
def parse(self, fp, headersonly=False):
|
||||
"""Create a message structure from the data in a file.
|
||||
|
||||
Reads all the data from the file and returns the root of the message
|
||||
structure. Optional headersonly is a flag specifying whether to stop
|
||||
parsing after reading the headers or not. The default is False,
|
||||
meaning it parses the entire contents of the file.
|
||||
"""
|
||||
feedparser = FeedParser(self._class, policy=self.policy)
|
||||
if headersonly:
|
||||
feedparser._set_headersonly()
|
||||
while True:
|
||||
data = fp.read(8192)
|
||||
if not data:
|
||||
break
|
||||
feedparser.feed(data)
|
||||
return feedparser.close()
|
||||
|
||||
def parsestr(self, text, headersonly=False):
|
||||
"""Create a message structure from a string.
|
||||
|
||||
Returns the root of the message structure. Optional headersonly is a
|
||||
flag specifying whether to stop parsing after reading the headers or
|
||||
not. The default is False, meaning it parses the entire contents of
|
||||
the file.
|
||||
"""
|
||||
return self.parse(StringIO(text), headersonly=headersonly)
|
||||
|
||||
|
||||
|
||||
class HeaderParser(Parser):
|
||||
def parse(self, fp, headersonly=True):
|
||||
return Parser.parse(self, fp, True)
|
||||
|
||||
def parsestr(self, text, headersonly=True):
|
||||
return Parser.parsestr(self, text, True)
|
||||
|
||||
|
||||
class BytesParser:
|
||||
|
||||
def __init__(self, *args, **kw):
|
||||
"""Parser of binary RFC 2822 and MIME email messages.
|
||||
|
||||
Creates an in-memory object tree representing the email message, which
|
||||
can then be manipulated and turned over to a Generator to return the
|
||||
textual representation of the message.
|
||||
|
||||
The input must be formatted as a block of RFC 2822 headers and header
|
||||
continuation lines, optionally preceeded by a `Unix-from' header. The
|
||||
header block is terminated either by the end of the input or by a
|
||||
blank line.
|
||||
|
||||
_class is the class to instantiate for new message objects when they
|
||||
must be created. This class must have a constructor that can take
|
||||
zero arguments. Default is Message.Message.
|
||||
"""
|
||||
self.parser = Parser(*args, **kw)
|
||||
|
||||
def parse(self, fp, headersonly=False):
|
||||
"""Create a message structure from the data in a binary file.
|
||||
|
||||
Reads all the data from the file and returns the root of the message
|
||||
structure. Optional headersonly is a flag specifying whether to stop
|
||||
parsing after reading the headers or not. The default is False,
|
||||
meaning it parses the entire contents of the file.
|
||||
"""
|
||||
fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
|
||||
with fp:
|
||||
return self.parser.parse(fp, headersonly)
|
||||
|
||||
|
||||
def parsebytes(self, text, headersonly=False):
|
||||
"""Create a message structure from a byte string.
|
||||
|
||||
Returns the root of the message structure. Optional headersonly is a
|
||||
flag specifying whether to stop parsing after reading the headers or
|
||||
not. The default is False, meaning it parses the entire contents of
|
||||
the file.
|
||||
"""
|
||||
text = text.decode('ASCII', errors='surrogateescape')
|
||||
return self.parser.parsestr(text, headersonly)
|
||||
|
||||
|
||||
class BytesHeaderParser(BytesParser):
|
||||
def parse(self, fp, headersonly=True):
|
||||
return BytesParser.parse(self, fp, headersonly=True)
|
||||
|
||||
def parsebytes(self, text, headersonly=True):
|
||||
return BytesParser.parsebytes(self, text, headersonly=True)
|
|
@ -0,0 +1,322 @@
|
|||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Ben Gertzfield
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Quoted-printable content transfer encoding per RFCs 2045-2047.
|
||||
|
||||
This module handles the content transfer encoding method defined in RFC 2045
|
||||
to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to
|
||||
safely encode text that is in a character set similar to the 7-bit US ASCII
|
||||
character set, but that includes some 8-bit characters that are normally not
|
||||
allowed in email bodies or headers.
|
||||
|
||||
Quoted-printable is very space-inefficient for encoding binary files; use the
|
||||
email.base64mime module for that instead.
|
||||
|
||||
This module provides an interface to encode and decode both headers and bodies
|
||||
with quoted-printable encoding.
|
||||
|
||||
RFC 2045 defines a method for including character set information in an
|
||||
`encoded-word' in a header. This method is commonly used for 8-bit real names
|
||||
in To:/From:/Cc: etc. fields, as well as Subject: lines.
|
||||
|
||||
This module does not do the line wrapping or end-of-line character
|
||||
conversion necessary for proper internationalized headers; it only
|
||||
does dumb encoding and decoding. To deal with the various line
|
||||
wrapping issues, use the email.header module.
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
'body_decode',
|
||||
'body_encode',
|
||||
'body_length',
|
||||
'decode',
|
||||
'decodestring',
|
||||
'header_decode',
|
||||
'header_encode',
|
||||
'header_length',
|
||||
'quote',
|
||||
'unquote',
|
||||
]
|
||||
|
||||
import re
|
||||
import io
|
||||
|
||||
from string import ascii_letters, digits, hexdigits
|
||||
|
||||
CRLF = '\r\n'
|
||||
NL = '\n'
|
||||
EMPTYSTRING = ''
|
||||
|
||||
# Build a mapping of octets to the expansion of that octet. Since we're only
|
||||
# going to have 256 of these things, this isn't terribly inefficient
|
||||
# space-wise. Remember that headers and bodies have different sets of safe
|
||||
# characters. Initialize both maps with the full expansion, and then override
|
||||
# the safe bytes with the more compact form.
|
||||
_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256))
|
||||
_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy()
|
||||
|
||||
# Safe header bytes which need no encoding.
|
||||
for c in b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii'):
|
||||
_QUOPRI_HEADER_MAP[c] = chr(c)
|
||||
# Headers have one other special encoding; spaces become underscores.
|
||||
_QUOPRI_HEADER_MAP[ord(' ')] = '_'
|
||||
|
||||
# Safe body bytes which need no encoding.
|
||||
for c in (b' !"#$%&\'()*+,-./0123456789:;<>'
|
||||
b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`'
|
||||
b'abcdefghijklmnopqrstuvwxyz{|}~\t'):
|
||||
_QUOPRI_BODY_MAP[c] = chr(c)
|
||||
|
||||
|
||||
|
||||
# Helpers
|
||||
def header_check(octet):
|
||||
"""Return True if the octet should be escaped with header quopri."""
|
||||
return chr(octet) != _QUOPRI_HEADER_MAP[octet]
|
||||
|
||||
|
||||
def body_check(octet):
|
||||
"""Return True if the octet should be escaped with body quopri."""
|
||||
return chr(octet) != _QUOPRI_BODY_MAP[octet]
|
||||
|
||||
|
||||
def header_length(bytearray):
|
||||
"""Return a header quoted-printable encoding length.
|
||||
|
||||
Note that this does not include any RFC 2047 chrome added by
|
||||
`header_encode()`.
|
||||
|
||||
:param bytearray: An array of bytes (a.k.a. octets).
|
||||
:return: The length in bytes of the byte array when it is encoded with
|
||||
quoted-printable for headers.
|
||||
"""
|
||||
return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray)
|
||||
|
||||
|
||||
def body_length(bytearray):
|
||||
"""Return a body quoted-printable encoding length.
|
||||
|
||||
:param bytearray: An array of bytes (a.k.a. octets).
|
||||
:return: The length in bytes of the byte array when it is encoded with
|
||||
quoted-printable for bodies.
|
||||
"""
|
||||
return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray)
|
||||
|
||||
|
||||
def _max_append(L, s, maxlen, extra=''):
|
||||
if not isinstance(s, str):
|
||||
s = chr(s)
|
||||
if not L:
|
||||
L.append(s.lstrip())
|
||||
elif len(L[-1]) + len(s) <= maxlen:
|
||||
L[-1] += extra + s
|
||||
else:
|
||||
L.append(s.lstrip())
|
||||
|
||||
|
||||
def unquote(s):
|
||||
"""Turn a string in the form =AB to the ASCII character with value 0xab"""
|
||||
return chr(int(s[1:3], 16))
|
||||
|
||||
|
||||
def quote(c):
|
||||
return '=%02X' % ord(c)
|
||||
|
||||
|
||||
|
||||
def header_encode(header_bytes, charset='iso-8859-1'):
|
||||
"""Encode a single header line with quoted-printable (like) encoding.
|
||||
|
||||
Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
|
||||
used specifically for email header fields to allow charsets with mostly 7
|
||||
bit characters (and some 8 bit) to remain more or less readable in non-RFC
|
||||
2045 aware mail clients.
|
||||
|
||||
charset names the character set to use in the RFC 2046 header. It
|
||||
defaults to iso-8859-1.
|
||||
"""
|
||||
# Return empty headers as an empty string.
|
||||
if not header_bytes:
|
||||
return ''
|
||||
# Iterate over every byte, encoding if necessary.
|
||||
encoded = []
|
||||
for octet in header_bytes:
|
||||
encoded.append(_QUOPRI_HEADER_MAP[octet])
|
||||
# Now add the RFC chrome to each encoded chunk and glue the chunks
|
||||
# together.
|
||||
return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))
|
||||
|
||||
|
||||
class _body_accumulator(io.StringIO):
|
||||
|
||||
def __init__(self, maxlinelen, eol, *args, **kw):
|
||||
super().__init__(*args, **kw)
|
||||
self.eol = eol
|
||||
self.maxlinelen = self.room = maxlinelen
|
||||
|
||||
def write_str(self, s):
|
||||
"""Add string s to the accumulated body."""
|
||||
self.write(s)
|
||||
self.room -= len(s)
|
||||
|
||||
def newline(self):
|
||||
"""Write eol, then start new line."""
|
||||
self.write_str(self.eol)
|
||||
self.room = self.maxlinelen
|
||||
|
||||
def write_soft_break(self):
|
||||
"""Write a soft break, then start a new line."""
|
||||
self.write_str('=')
|
||||
self.newline()
|
||||
|
||||
def write_wrapped(self, s, extra_room=0):
|
||||
"""Add a soft line break if needed, then write s."""
|
||||
if self.room < len(s) + extra_room:
|
||||
self.write_soft_break()
|
||||
self.write_str(s)
|
||||
|
||||
def write_char(self, c, is_last_char):
|
||||
if not is_last_char:
|
||||
# Another character follows on this line, so we must leave
|
||||
# extra room, either for it or a soft break, and whitespace
|
||||
# need not be quoted.
|
||||
self.write_wrapped(c, extra_room=1)
|
||||
elif c not in ' \t':
|
||||
# For this and remaining cases, no more characters follow,
|
||||
# so there is no need to reserve extra room (since a hard
|
||||
# break will immediately follow).
|
||||
self.write_wrapped(c)
|
||||
elif self.room >= 3:
|
||||
# It's a whitespace character at end-of-line, and we have room
|
||||
# for the three-character quoted encoding.
|
||||
self.write(quote(c))
|
||||
elif self.room == 2:
|
||||
# There's room for the whitespace character and a soft break.
|
||||
self.write(c)
|
||||
self.write_soft_break()
|
||||
else:
|
||||
# There's room only for a soft break. The quoted whitespace
|
||||
# will be the only content on the subsequent line.
|
||||
self.write_soft_break()
|
||||
self.write(quote(c))
|
||||
|
||||
|
||||
def body_encode(body, maxlinelen=76, eol=NL):
|
||||
"""Encode with quoted-printable, wrapping at maxlinelen characters.
|
||||
|
||||
Each line of encoded text will end with eol, which defaults to "\\n". Set
|
||||
this to "\\r\\n" if you will be using the result of this function directly
|
||||
in an email.
|
||||
|
||||
Each line will be wrapped at, at most, maxlinelen characters before the
|
||||
eol string (maxlinelen defaults to 76 characters, the maximum value
|
||||
permitted by RFC 2045). Long lines will have the 'soft line break'
|
||||
quoted-printable character "=" appended to them, so the decoded text will
|
||||
be identical to the original text.
|
||||
|
||||
The minimum maxlinelen is 4 to have room for a quoted character ("=XX")
|
||||
followed by a soft line break. Smaller values will generate a
|
||||
ValueError.
|
||||
|
||||
"""
|
||||
|
||||
if maxlinelen < 4:
|
||||
raise ValueError("maxlinelen must be at least 4")
|
||||
if not body:
|
||||
return body
|
||||
|
||||
# The last line may or may not end in eol, but all other lines do.
|
||||
last_has_eol = (body[-1] in '\r\n')
|
||||
|
||||
# This accumulator will make it easier to build the encoded body.
|
||||
encoded_body = _body_accumulator(maxlinelen, eol)
|
||||
|
||||
lines = body.splitlines()
|
||||
last_line_no = len(lines) - 1
|
||||
for line_no, line in enumerate(lines):
|
||||
last_char_index = len(line) - 1
|
||||
for i, c in enumerate(line):
|
||||
if body_check(ord(c)):
|
||||
c = quote(c)
|
||||
encoded_body.write_char(c, i==last_char_index)
|
||||
# Add an eol if input line had eol. All input lines have eol except
|
||||
# possibly the last one.
|
||||
if line_no < last_line_no or last_has_eol:
|
||||
encoded_body.newline()
|
||||
|
||||
return encoded_body.getvalue()
|
||||
|
||||
|
||||
|
||||
# BAW: I'm not sure if the intent was for the signature of this function to be
|
||||
# the same as base64MIME.decode() or not...
|
||||
def decode(encoded, eol=NL):
|
||||
"""Decode a quoted-printable string.
|
||||
|
||||
Lines are separated with eol, which defaults to \\n.
|
||||
"""
|
||||
if not encoded:
|
||||
return encoded
|
||||
# BAW: see comment in encode() above. Again, we're building up the
|
||||
# decoded string with string concatenation, which could be done much more
|
||||
# efficiently.
|
||||
decoded = ''
|
||||
|
||||
for line in encoded.splitlines():
|
||||
line = line.rstrip()
|
||||
if not line:
|
||||
decoded += eol
|
||||
continue
|
||||
|
||||
i = 0
|
||||
n = len(line)
|
||||
while i < n:
|
||||
c = line[i]
|
||||
if c != '=':
|
||||
decoded += c
|
||||
i += 1
|
||||
# Otherwise, c == "=". Are we at the end of the line? If so, add
|
||||
# a soft line break.
|
||||
elif i+1 == n:
|
||||
i += 1
|
||||
continue
|
||||
# Decode if in form =AB
|
||||
elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:
|
||||
decoded += unquote(line[i:i+3])
|
||||
i += 3
|
||||
# Otherwise, not in form =AB, pass literally
|
||||
else:
|
||||
decoded += c
|
||||
i += 1
|
||||
|
||||
if i == n:
|
||||
decoded += eol
|
||||
# Special case if original string did not end with eol
|
||||
if encoded[-1] not in '\r\n' and decoded.endswith(eol):
|
||||
decoded = decoded[:-1]
|
||||
return decoded
|
||||
|
||||
|
||||
# For convenience and backwards compatibility w/ standard base64 module
|
||||
body_decode = decode
|
||||
decodestring = decode
|
||||
|
||||
|
||||
|
||||
def _unquote_match(match):
|
||||
"""Turn a match in the form =AB to the ASCII character with value 0xab"""
|
||||
s = match.group(0)
|
||||
return unquote(s)
|
||||
|
||||
|
||||
# Header decoding is done a bit differently
|
||||
def header_decode(s):
|
||||
"""Decode a string encoded with RFC 2045 MIME header `Q' encoding.
|
||||
|
||||
This function does not parse a full MIME header value encoded with
|
||||
quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use
|
||||
the high level email.header class for that functionality.
|
||||
"""
|
||||
s = s.replace('_', ' ')
|
||||
return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, flags=re.ASCII)
|
|
@ -0,0 +1,391 @@
|
|||
# Copyright (C) 2001-2010 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Miscellaneous utilities."""
|
||||
|
||||
__all__ = [
|
||||
'collapse_rfc2231_value',
|
||||
'decode_params',
|
||||
'decode_rfc2231',
|
||||
'encode_rfc2231',
|
||||
'formataddr',
|
||||
'formatdate',
|
||||
'format_datetime',
|
||||
'getaddresses',
|
||||
'make_msgid',
|
||||
'mktime_tz',
|
||||
'parseaddr',
|
||||
'parsedate',
|
||||
'parsedate_tz',
|
||||
'parsedate_to_datetime',
|
||||
'unquote',
|
||||
]
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import base64
|
||||
import random
|
||||
import socket
|
||||
import datetime
|
||||
import urllib.parse
|
||||
import warnings
|
||||
from io import StringIO
|
||||
|
||||
from email._parseaddr import quote
|
||||
from email._parseaddr import AddressList as _AddressList
|
||||
from email._parseaddr import mktime_tz
|
||||
|
||||
from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz
|
||||
|
||||
from quopri import decodestring as _qdecode
|
||||
|
||||
# Intrapackage imports
|
||||
from email.encoders import _bencode, _qencode
|
||||
from email.charset import Charset
|
||||
|
||||
COMMASPACE = ', '
|
||||
EMPTYSTRING = ''
|
||||
UEMPTYSTRING = ''
|
||||
CRLF = '\r\n'
|
||||
TICK = "'"
|
||||
|
||||
specialsre = re.compile(r'[][\\()<>@,:;".]')
|
||||
escapesre = re.compile(r'[\\"]')
|
||||
|
||||
# How to figure out if we are processing strings that come from a byte
|
||||
# source with undecodable characters.
|
||||
_has_surrogates = re.compile(
|
||||
'([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
|
||||
|
||||
# How to deal with a string containing bytes before handing it to the
|
||||
# application through the 'normal' interface.
|
||||
def _sanitize(string):
|
||||
# Turn any escaped bytes into unicode 'unknown' char.
|
||||
original_bytes = string.encode('ascii', 'surrogateescape')
|
||||
return original_bytes.decode('ascii', 'replace')
|
||||
|
||||
|
||||
# Helpers
|
||||
|
||||
def formataddr(pair, charset='utf-8'):
|
||||
"""The inverse of parseaddr(), this takes a 2-tuple of the form
|
||||
(realname, email_address) and returns the string value suitable
|
||||
for an RFC 2822 From, To or Cc header.
|
||||
|
||||
If the first element of pair is false, then the second element is
|
||||
returned unmodified.
|
||||
|
||||
Optional charset if given is the character set that is used to encode
|
||||
realname in case realname is not ASCII safe. Can be an instance of str or
|
||||
a Charset-like object which has a header_encode method. Default is
|
||||
'utf-8'.
|
||||
"""
|
||||
name, address = pair
|
||||
# The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't.
|
||||
address.encode('ascii')
|
||||
if name:
|
||||
try:
|
||||
name.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
if isinstance(charset, str):
|
||||
charset = Charset(charset)
|
||||
encoded_name = charset.header_encode(name)
|
||||
return "%s <%s>" % (encoded_name, address)
|
||||
else:
|
||||
quotes = ''
|
||||
if specialsre.search(name):
|
||||
quotes = '"'
|
||||
name = escapesre.sub(r'\\\g<0>', name)
|
||||
return '%s%s%s <%s>' % (quotes, name, quotes, address)
|
||||
return address
|
||||
|
||||
|
||||
|
||||
def getaddresses(fieldvalues):
|
||||
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
|
||||
all = COMMASPACE.join(fieldvalues)
|
||||
a = _AddressList(all)
|
||||
return a.addresslist
|
||||
|
||||
|
||||
|
||||
ecre = re.compile(r'''
|
||||
=\? # literal =?
|
||||
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
|
||||
\? # literal ?
|
||||
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
|
||||
\? # literal ?
|
||||
(?P<atom>.*?) # non-greedy up to the next ?= is the atom
|
||||
\?= # literal ?=
|
||||
''', re.VERBOSE | re.IGNORECASE)
|
||||
|
||||
|
||||
def _format_timetuple_and_zone(timetuple, zone):
|
||||
return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
|
||||
['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]],
|
||||
timetuple[2],
|
||||
['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
|
||||
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1],
|
||||
timetuple[0], timetuple[3], timetuple[4], timetuple[5],
|
||||
zone)
|
||||
|
||||
def formatdate(timeval=None, localtime=False, usegmt=False):
|
||||
"""Returns a date string as specified by RFC 2822, e.g.:
|
||||
|
||||
Fri, 09 Nov 2001 01:08:47 -0000
|
||||
|
||||
Optional timeval if given is a floating point time value as accepted by
|
||||
gmtime() and localtime(), otherwise the current time is used.
|
||||
|
||||
Optional localtime is a flag that when True, interprets timeval, and
|
||||
returns a date relative to the local timezone instead of UTC, properly
|
||||
taking daylight savings time into account.
|
||||
|
||||
Optional argument usegmt means that the timezone is written out as
|
||||
an ascii string, not numeric one (so "GMT" instead of "+0000"). This
|
||||
is needed for HTTP, and is only used when localtime==False.
|
||||
"""
|
||||
# Note: we cannot use strftime() because that honors the locale and RFC
|
||||
# 2822 requires that day and month names be the English abbreviations.
|
||||
if timeval is None:
|
||||
timeval = time.time()
|
||||
if localtime:
|
||||
now = time.localtime(timeval)
|
||||
# Calculate timezone offset, based on whether the local zone has
|
||||
# daylight savings time, and whether DST is in effect.
|
||||
if time.daylight and now[-1]:
|
||||
offset = time.altzone
|
||||
else:
|
||||
offset = time.timezone
|
||||
hours, minutes = divmod(abs(offset), 3600)
|
||||
# Remember offset is in seconds west of UTC, but the timezone is in
|
||||
# minutes east of UTC, so the signs differ.
|
||||
if offset > 0:
|
||||
sign = '-'
|
||||
else:
|
||||
sign = '+'
|
||||
zone = '%s%02d%02d' % (sign, hours, minutes // 60)
|
||||
else:
|
||||
now = time.gmtime(timeval)
|
||||
# Timezone offset is always -0000
|
||||
if usegmt:
|
||||
zone = 'GMT'
|
||||
else:
|
||||
zone = '-0000'
|
||||
return _format_timetuple_and_zone(now, zone)
|
||||
|
||||
def format_datetime(dt, usegmt=False):
|
||||
"""Turn a datetime into a date string as specified in RFC 2822.
|
||||
|
||||
If usegmt is True, dt must be an aware datetime with an offset of zero. In
|
||||
this case 'GMT' will be rendered instead of the normal +0000 required by
|
||||
RFC2822. This is to support HTTP headers involving date stamps.
|
||||
"""
|
||||
now = dt.timetuple()
|
||||
if usegmt:
|
||||
if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc:
|
||||
raise ValueError("usegmt option requires a UTC datetime")
|
||||
zone = 'GMT'
|
||||
elif dt.tzinfo is None:
|
||||
zone = '-0000'
|
||||
else:
|
||||
zone = dt.strftime("%z")
|
||||
return _format_timetuple_and_zone(now, zone)
|
||||
|
||||
|
||||
def make_msgid(idstring=None, domain=None):
|
||||
"""Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
|
||||
|
||||
<20020201195627.33539.96671@nightshade.la.mastaler.com>
|
||||
|
||||
Optional idstring if given is a string used to strengthen the
|
||||
uniqueness of the message id. Optional domain if given provides the
|
||||
portion of the message id after the '@'. It defaults to the locally
|
||||
defined hostname.
|
||||
"""
|
||||
timeval = time.time()
|
||||
utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
|
||||
pid = os.getpid()
|
||||
randint = random.randrange(100000)
|
||||
if idstring is None:
|
||||
idstring = ''
|
||||
else:
|
||||
idstring = '.' + idstring
|
||||
if domain is None:
|
||||
domain = socket.getfqdn()
|
||||
msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain)
|
||||
return msgid
|
||||
|
||||
|
||||
def parsedate_to_datetime(data):
|
||||
*dtuple, tz = _parsedate_tz(data)
|
||||
if tz is None:
|
||||
return datetime.datetime(*dtuple[:6])
|
||||
return datetime.datetime(*dtuple[:6],
|
||||
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
|
||||
|
||||
|
||||
def parseaddr(addr):
|
||||
addrs = _AddressList(addr).addresslist
|
||||
if not addrs:
|
||||
return '', ''
|
||||
return addrs[0]
|
||||
|
||||
|
||||
# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
|
||||
def unquote(str):
|
||||
"""Remove quotes from a string."""
|
||||
if len(str) > 1:
|
||||
if str.startswith('"') and str.endswith('"'):
|
||||
return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
|
||||
if str.startswith('<') and str.endswith('>'):
|
||||
return str[1:-1]
|
||||
return str
|
||||
|
||||
|
||||
|
||||
# RFC2231-related functions - parameter encoding and decoding
|
||||
def decode_rfc2231(s):
|
||||
"""Decode string according to RFC 2231"""
|
||||
parts = s.split(TICK, 2)
|
||||
if len(parts) <= 2:
|
||||
return None, None, s
|
||||
return parts
|
||||
|
||||
|
||||
def encode_rfc2231(s, charset=None, language=None):
|
||||
"""Encode string according to RFC 2231.
|
||||
|
||||
If neither charset nor language is given, then s is returned as-is. If
|
||||
charset is given but not language, the string is encoded using the empty
|
||||
string for language.
|
||||
"""
|
||||
s = urllib.parse.quote(s, safe='', encoding=charset or 'ascii')
|
||||
if charset is None and language is None:
|
||||
return s
|
||||
if language is None:
|
||||
language = ''
|
||||
return "%s'%s'%s" % (charset, language, s)
|
||||
|
||||
|
||||
rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$',
|
||||
re.ASCII)
|
||||
|
||||
def decode_params(params):
|
||||
"""Decode parameters list according to RFC 2231.
|
||||
|
||||
params is a sequence of 2-tuples containing (param name, string value).
|
||||
"""
|
||||
# Copy params so we don't mess with the original
|
||||
params = params[:]
|
||||
new_params = []
|
||||
# Map parameter's name to a list of continuations. The values are a
|
||||
# 3-tuple of the continuation number, the string value, and a flag
|
||||
# specifying whether a particular segment is %-encoded.
|
||||
rfc2231_params = {}
|
||||
name, value = params.pop(0)
|
||||
new_params.append((name, value))
|
||||
while params:
|
||||
name, value = params.pop(0)
|
||||
if name.endswith('*'):
|
||||
encoded = True
|
||||
else:
|
||||
encoded = False
|
||||
value = unquote(value)
|
||||
mo = rfc2231_continuation.match(name)
|
||||
if mo:
|
||||
name, num = mo.group('name', 'num')
|
||||
if num is not None:
|
||||
num = int(num)
|
||||
rfc2231_params.setdefault(name, []).append((num, value, encoded))
|
||||
else:
|
||||
new_params.append((name, '"%s"' % quote(value)))
|
||||
if rfc2231_params:
|
||||
for name, continuations in rfc2231_params.items():
|
||||
value = []
|
||||
extended = False
|
||||
# Sort by number
|
||||
continuations.sort()
|
||||
# And now append all values in numerical order, converting
|
||||
# %-encodings for the encoded segments. If any of the
|
||||
# continuation names ends in a *, then the entire string, after
|
||||
# decoding segments and concatenating, must have the charset and
|
||||
# language specifiers at the beginning of the string.
|
||||
for num, s, encoded in continuations:
|
||||
if encoded:
|
||||
# Decode as "latin-1", so the characters in s directly
|
||||
# represent the percent-encoded octet values.
|
||||
# collapse_rfc2231_value treats this as an octet sequence.
|
||||
s = urllib.parse.unquote(s, encoding="latin-1")
|
||||
extended = True
|
||||
value.append(s)
|
||||
value = quote(EMPTYSTRING.join(value))
|
||||
if extended:
|
||||
charset, language, value = decode_rfc2231(value)
|
||||
new_params.append((name, (charset, language, '"%s"' % value)))
|
||||
else:
|
||||
new_params.append((name, '"%s"' % value))
|
||||
return new_params
|
||||
|
||||
def collapse_rfc2231_value(value, errors='replace',
|
||||
fallback_charset='us-ascii'):
|
||||
if not isinstance(value, tuple) or len(value) != 3:
|
||||
return unquote(value)
|
||||
# While value comes to us as a unicode string, we need it to be a bytes
|
||||
# object. We do not want bytes() normal utf-8 decoder, we want a straight
|
||||
# interpretation of the string as character bytes.
|
||||
charset, language, text = value
|
||||
rawbytes = bytes(text, 'raw-unicode-escape')
|
||||
try:
|
||||
return str(rawbytes, charset, errors)
|
||||
except LookupError:
|
||||
# charset is not a known codec.
|
||||
return unquote(text)
|
||||
|
||||
|
||||
#
|
||||
# datetime doesn't provide a localtime function yet, so provide one. Code
|
||||
# adapted from the patch in issue 9527. This may not be perfect, but it is
|
||||
# better than not having it.
|
||||
#
|
||||
|
||||
def localtime(dt=None, isdst=-1):
|
||||
"""Return local time as an aware datetime object.
|
||||
|
||||
If called without arguments, return current time. Otherwise *dt*
|
||||
argument should be a datetime instance, and it is converted to the
|
||||
local time zone according to the system time zone database. If *dt* is
|
||||
naive (that is, dt.tzinfo is None), it is assumed to be in local time.
|
||||
In this case, a positive or zero value for *isdst* causes localtime to
|
||||
presume initially that summer time (for example, Daylight Saving Time)
|
||||
is or is not (respectively) in effect for the specified time. A
|
||||
negative value for *isdst* causes the localtime() function to attempt
|
||||
to divine whether summer time is in effect for the specified time.
|
||||
|
||||
"""
|
||||
if dt is None:
|
||||
return datetime.datetime.now(datetime.timezone.utc).astimezone()
|
||||
if dt.tzinfo is not None:
|
||||
return dt.astimezone()
|
||||
# We have a naive datetime. Convert to a (localtime) timetuple and pass to
|
||||
# system mktime together with the isdst hint. System mktime will return
|
||||
# seconds since epoch.
|
||||
tm = dt.timetuple()[:-1] + (isdst,)
|
||||
seconds = time.mktime(tm)
|
||||
localtm = time.localtime(seconds)
|
||||
try:
|
||||
delta = datetime.timedelta(seconds=localtm.tm_gmtoff)
|
||||
tz = datetime.timezone(delta, localtm.tm_zone)
|
||||
except AttributeError:
|
||||
# Compute UTC offset and compare with the value implied by tm_isdst.
|
||||
# If the values match, use the zone name implied by tm_isdst.
|
||||
delta = dt - datetime.datetime(*time.gmtime(seconds)[:6])
|
||||
dst = time.daylight and localtm.tm_isdst > 0
|
||||
gmtoff = -(time.altzone if dst else time.timezone)
|
||||
if delta == datetime.timedelta(seconds=gmtoff):
|
||||
tz = datetime.timezone(delta, time.tzname[dst])
|
||||
else:
|
||||
tz = datetime.timezone(delta)
|
||||
return dt.replace(tzinfo=tz)
|
|
@ -0,0 +1,38 @@
|
|||
EPERM = 1 # Operation not permitted
|
||||
ENOENT = 2 # No such file or directory
|
||||
ESRCH = 3 # No such process
|
||||
EINTR = 4 # Interrupted system call
|
||||
EIO = 5 # I/O error
|
||||
ENXIO = 6 # No such device or address
|
||||
E2BIG = 7 # Argument list too long
|
||||
ENOEXEC = 8 # Exec format error
|
||||
EBADF = 9 # Bad file number
|
||||
ECHILD = 10 # No child processes
|
||||
EAGAIN = 11 # Try again
|
||||
ENOMEM = 12 # Out of memory
|
||||
EACCES = 13 # Permission denied
|
||||
EFAULT = 14 # Bad address
|
||||
ENOTBLK = 15 # Block device required
|
||||
EBUSY = 16 # Device or resource busy
|
||||
EEXIST = 17 # File exists
|
||||
EXDEV = 18 # Cross-device link
|
||||
ENODEV = 19 # No such device
|
||||
ENOTDIR = 20 # Not a directory
|
||||
EISDIR = 21 # Is a directory
|
||||
EINVAL = 22 # Invalid argument
|
||||
ENFILE = 23 # File table overflow
|
||||
EMFILE = 24 # Too many open files
|
||||
ENOTTY = 25 # Not a typewriter
|
||||
ETXTBSY = 26 # Text file busy
|
||||
EFBIG = 27 # File too large
|
||||
ENOSPC = 28 # No space left on device
|
||||
ESPIPE = 29 # Illegal seek
|
||||
EROFS = 30 # Read-only file system
|
||||
EMLINK = 31 # Too many links
|
||||
EPIPE = 32 # Broken pipe
|
||||
EDOM = 33 # Math argument out of domain of func
|
||||
ERANGE = 34 # Math result not representable
|
||||
EAFNOSUPPORT = 97 # Address family not supported by protocol
|
||||
ECONNRESET = 104 # Connection timed out
|
||||
ETIMEDOUT = 110 # Connection timed out
|
||||
EINPROGRESS = 115 # Operation now in progress
|
|
@ -0,0 +1,13 @@
|
|||
import sys
|
||||
import os
|
||||
import shutil
|
||||
import utarfile
|
||||
|
||||
t = utarfile.TarFile(sys.argv[1])
|
||||
for i in t:
|
||||
print(i)
|
||||
if i.type == utarfile.DIRTYPE:
|
||||
os.makedirs(i.name)
|
||||
else:
|
||||
f = t.extractfile(i)
|
||||
shutil.copyfileobj(f, open(i.name, "wb"))
|
|
@ -0,0 +1,28 @@
|
|||
#
|
||||
# uaiohttpclient - fetch URL passed as command line argument.
|
||||
#
|
||||
import uasyncio as asyncio
|
||||
import uaiohttpclient as aiohttp
|
||||
|
||||
|
||||
def print_stream(resp):
|
||||
print((yield from resp.read()))
|
||||
return
|
||||
while True:
|
||||
line = yield from reader.readline()
|
||||
if not line:
|
||||
break
|
||||
print(line.rstrip())
|
||||
|
||||
def run(url):
|
||||
resp = yield from aiohttp.request("GET", url)
|
||||
print(resp)
|
||||
yield from print_stream(resp)
|
||||
|
||||
import sys
|
||||
import logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
url = sys.argv[1]
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(run(url))
|
||||
loop.close()
|
|
@ -0,0 +1,37 @@
|
|||
import ffi
|
||||
import os
|
||||
import ffilib
|
||||
|
||||
|
||||
libc = ffilib.libc()
|
||||
|
||||
fcntl_l = libc.func("i", "fcntl", "iil")
|
||||
fcntl_s = libc.func("i", "fcntl", "iip")
|
||||
ioctl_l = libc.func("i", "ioctl", "iil")
|
||||
ioctl_s = libc.func("i", "ioctl", "iip")
|
||||
|
||||
|
||||
def fcntl(fd, op, arg=0):
|
||||
if type(arg) is int:
|
||||
r = fcntl_l(fd, op, arg)
|
||||
os.check_error(r)
|
||||
return r
|
||||
else:
|
||||
r = fcntl_s(fd, op, arg)
|
||||
os.check_error(r)
|
||||
# TODO: Not compliant. CPython says that arg should be immutable,
|
||||
# and possibly mutated buffer is returned.
|
||||
return r
|
||||
|
||||
|
||||
def ioctl(fd, op, arg=0, mut=False):
|
||||
if type(arg) is int:
|
||||
r = ioctl_l(fd, op, arg)
|
||||
os.check_error(r)
|
||||
return r
|
||||
else:
|
||||
# TODO
|
||||
assert mut
|
||||
r = ioctl_s(fd, op, arg)
|
||||
os.check_error(r)
|
||||
return r
|
|
@ -0,0 +1,46 @@
|
|||
import sys
|
||||
try:
|
||||
import ffi
|
||||
except ImportError:
|
||||
ffi = None
|
||||
|
||||
_cache = {}
|
||||
|
||||
def open(name, maxver=10, extra=()):
|
||||
if not ffi:
|
||||
return None
|
||||
try:
|
||||
return _cache[name]
|
||||
except KeyError:
|
||||
pass
|
||||
def libs():
|
||||
if sys.platform == "linux":
|
||||
yield '%s.so' % name
|
||||
for i in range(maxver, -1, -1):
|
||||
yield '%s.so.%u' % (name, i)
|
||||
else:
|
||||
for ext in ('dylib', 'dll'):
|
||||
yield '%s.%s' % (name, ext)
|
||||
for n in extra:
|
||||
yield n
|
||||
err = None
|
||||
for n in libs():
|
||||
try:
|
||||
l = ffi.open(n)
|
||||
_cache[name] = l
|
||||
return l
|
||||
except OSError as e:
|
||||
err = e
|
||||
raise err
|
||||
|
||||
def libc():
|
||||
return open("libc", 6)
|
||||
|
||||
# Find out bitness of the platform, even if long ints are not supported
|
||||
# TODO: All bitness differences should be removed from micropython-lib, and
|
||||
# this snippet too.
|
||||
bitness = 1
|
||||
v = sys.maxsize
|
||||
while v:
|
||||
bitness += 1
|
||||
v >>= 1
|
|
@ -0,0 +1,111 @@
|
|||
"""Filename matching with shell patterns.
|
||||
|
||||
fnmatch(FILENAME, PATTERN) matches according to the local convention.
|
||||
fnmatchcase(FILENAME, PATTERN) always takes case in account.
|
||||
|
||||
The functions operate by translating the pattern into a regular
|
||||
expression. They cache the compiled regular expressions for speed.
|
||||
|
||||
The function translate(PATTERN) returns a regular expression
|
||||
corresponding to PATTERN. (It does not compile it.)
|
||||
"""
|
||||
import os
|
||||
import os.path
|
||||
import posixpath
|
||||
import re
|
||||
#import functools
|
||||
|
||||
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
|
||||
|
||||
def fnmatch(name, pat):
|
||||
"""Test whether FILENAME matches PATTERN.
|
||||
|
||||
Patterns are Unix shell style:
|
||||
|
||||
* matches everything
|
||||
? matches any single character
|
||||
[seq] matches any character in seq
|
||||
[!seq] matches any char not in seq
|
||||
|
||||
An initial period in FILENAME is not special.
|
||||
Both FILENAME and PATTERN are first case-normalized
|
||||
if the operating system requires it.
|
||||
If you don't want this, use fnmatchcase(FILENAME, PATTERN).
|
||||
"""
|
||||
name = os.path.normcase(name)
|
||||
pat = os.path.normcase(pat)
|
||||
return fnmatchcase(name, pat)
|
||||
|
||||
#@functools.lru_cache(maxsize=256, typed=True)
|
||||
def _compile_pattern(pat):
|
||||
if isinstance(pat, bytes):
|
||||
pat_str = str(pat, 'ISO-8859-1')
|
||||
res_str = translate(pat_str)
|
||||
res = bytes(res_str, 'ISO-8859-1')
|
||||
else:
|
||||
res = translate(pat)
|
||||
return re.compile(res).match
|
||||
|
||||
def filter(names, pat):
|
||||
"""Return the subset of the list NAMES that match PAT."""
|
||||
result = []
|
||||
pat = os.path.normcase(pat)
|
||||
match = _compile_pattern(pat)
|
||||
if os.path is posixpath:
|
||||
# normcase on posix is NOP. Optimize it away from the loop.
|
||||
for name in names:
|
||||
if match(name):
|
||||
result.append(name)
|
||||
else:
|
||||
for name in names:
|
||||
if match(os.path.normcase(name)):
|
||||
result.append(name)
|
||||
return result
|
||||
|
||||
def fnmatchcase(name, pat):
|
||||
"""Test whether FILENAME matches PATTERN, including case.
|
||||
|
||||
This is a version of fnmatch() which doesn't case-normalize
|
||||
its arguments.
|
||||
"""
|
||||
match = _compile_pattern(pat)
|
||||
return match(name) is not None
|
||||
|
||||
|
||||
def translate(pat):
|
||||
"""Translate a shell PATTERN to a regular expression.
|
||||
|
||||
There is no way to quote meta-characters.
|
||||
"""
|
||||
|
||||
i, n = 0, len(pat)
|
||||
res = ''
|
||||
while i < n:
|
||||
c = pat[i]
|
||||
i = i+1
|
||||
if c == '*':
|
||||
res = res + '.*'
|
||||
elif c == '?':
|
||||
res = res + '.'
|
||||
elif c == '[':
|
||||
j = i
|
||||
if j < n and pat[j] == '!':
|
||||
j = j+1
|
||||
if j < n and pat[j] == ']':
|
||||
j = j+1
|
||||
while j < n and pat[j] != ']':
|
||||
j = j+1
|
||||
if j >= n:
|
||||
res = res + '\\['
|
||||
else:
|
||||
stuff = pat[i:j].replace('\\','\\\\')
|
||||
i = j+1
|
||||
if stuff[0] == '!':
|
||||
stuff = '^' + stuff[1:]
|
||||
elif stuff[0] == '^':
|
||||
stuff = '\\' + stuff
|
||||
res = '%s[%s]' % (res, stuff)
|
||||
else:
|
||||
res = res + re.escape(c)
|
||||
# Original patterns is undefined, see http://bugs.python.org/issue21464
|
||||
return '(?ms)' + res + '\Z'
|
|
@ -0,0 +1,27 @@
|
|||
def partial(func, *args, **kwargs):
|
||||
def _partial(*more_args, **more_kwargs):
|
||||
kw = kwargs.copy()
|
||||
kw.update(more_kwargs)
|
||||
return func(*(args + more_args), **kw)
|
||||
return _partial
|
||||
|
||||
|
||||
def update_wrapper(wrapper, wrapped, assigned=None, updated=None):
|
||||
# Dummy impl
|
||||
return wrapper
|
||||
|
||||
|
||||
def wraps(wrapped, assigned=None, updated=None):
|
||||
# Dummy impl
|
||||
return lambda x: x
|
||||
|
||||
|
||||
def reduce(function, iterable, initializer=None):
|
||||
it = iter(iterable)
|
||||
if initializer is None:
|
||||
value = next(it)
|
||||
else:
|
||||
value = initializer
|
||||
for element in it:
|
||||
value = function(value, element)
|
||||
return value
|
|
@ -0,0 +1,215 @@
|
|||
"""Parser for command line options.
|
||||
|
||||
This module helps scripts to parse the command line arguments in
|
||||
sys.argv. It supports the same conventions as the Unix getopt()
|
||||
function (including the special meanings of arguments of the form `-'
|
||||
and `--'). Long options similar to those supported by GNU software
|
||||
may be used as well via an optional third argument. This module
|
||||
provides two functions and an exception:
|
||||
|
||||
getopt() -- Parse command line options
|
||||
gnu_getopt() -- Like getopt(), but allow option and non-option arguments
|
||||
to be intermixed.
|
||||
GetoptError -- exception (class) raised with 'opt' attribute, which is the
|
||||
option involved with the exception.
|
||||
"""
|
||||
|
||||
# Long option support added by Lars Wirzenius <liw@iki.fi>.
|
||||
#
|
||||
# Gerrit Holl <gerrit@nl.linux.org> moved the string-based exceptions
|
||||
# to class-based exceptions.
|
||||
#
|
||||
# Peter Åstrand <astrand@lysator.liu.se> added gnu_getopt().
|
||||
#
|
||||
# TODO for gnu_getopt():
|
||||
#
|
||||
# - GNU getopt_long_only mechanism
|
||||
# - allow the caller to specify ordering
|
||||
# - RETURN_IN_ORDER option
|
||||
# - GNU extension with '-' as first character of option string
|
||||
# - optional arguments, specified by double colons
|
||||
# - a option string with a W followed by semicolon should
|
||||
# treat "-W foo" as "--foo"
|
||||
|
||||
__all__ = ["GetoptError","error","getopt","gnu_getopt"]
|
||||
|
||||
import os
|
||||
try:
|
||||
from gettext import gettext as _
|
||||
except ImportError:
|
||||
# Bootstrapping Python: gettext's dependencies not built yet
|
||||
def _(s): return s
|
||||
|
||||
class GetoptError(Exception):
|
||||
opt = ''
|
||||
msg = ''
|
||||
def __init__(self, msg, opt=''):
|
||||
self.msg = msg
|
||||
self.opt = opt
|
||||
Exception.__init__(self, msg, opt)
|
||||
|
||||
def __str__(self):
|
||||
return self.msg
|
||||
|
||||
error = GetoptError # backward compatibility
|
||||
|
||||
def getopt(args, shortopts, longopts = []):
|
||||
"""getopt(args, options[, long_options]) -> opts, args
|
||||
|
||||
Parses command line options and parameter list. args is the
|
||||
argument list to be parsed, without the leading reference to the
|
||||
running program. Typically, this means "sys.argv[1:]". shortopts
|
||||
is the string of option letters that the script wants to
|
||||
recognize, with options that require an argument followed by a
|
||||
colon (i.e., the same format that Unix getopt() uses). If
|
||||
specified, longopts is a list of strings with the names of the
|
||||
long options which should be supported. The leading '--'
|
||||
characters should not be included in the option name. Options
|
||||
which require an argument should be followed by an equal sign
|
||||
('=').
|
||||
|
||||
The return value consists of two elements: the first is a list of
|
||||
(option, value) pairs; the second is the list of program arguments
|
||||
left after the option list was stripped (this is a trailing slice
|
||||
of the first argument). Each option-and-value pair returned has
|
||||
the option as its first element, prefixed with a hyphen (e.g.,
|
||||
'-x'), and the option argument as its second element, or an empty
|
||||
string if the option has no argument. The options occur in the
|
||||
list in the same order in which they were found, thus allowing
|
||||
multiple occurrences. Long and short options may be mixed.
|
||||
|
||||
"""
|
||||
|
||||
opts = []
|
||||
if type(longopts) == type(""):
|
||||
longopts = [longopts]
|
||||
else:
|
||||
longopts = list(longopts)
|
||||
while args and args[0].startswith('-') and args[0] != '-':
|
||||
if args[0] == '--':
|
||||
args = args[1:]
|
||||
break
|
||||
if args[0].startswith('--'):
|
||||
opts, args = do_longs(opts, args[0][2:], longopts, args[1:])
|
||||
else:
|
||||
opts, args = do_shorts(opts, args[0][1:], shortopts, args[1:])
|
||||
|
||||
return opts, args
|
||||
|
||||
def gnu_getopt(args, shortopts, longopts = []):
|
||||
"""getopt(args, options[, long_options]) -> opts, args
|
||||
|
||||
This function works like getopt(), except that GNU style scanning
|
||||
mode is used by default. This means that option and non-option
|
||||
arguments may be intermixed. The getopt() function stops
|
||||
processing options as soon as a non-option argument is
|
||||
encountered.
|
||||
|
||||
If the first character of the option string is `+', or if the
|
||||
environment variable POSIXLY_CORRECT is set, then option
|
||||
processing stops as soon as a non-option argument is encountered.
|
||||
|
||||
"""
|
||||
|
||||
opts = []
|
||||
prog_args = []
|
||||
if isinstance(longopts, str):
|
||||
longopts = [longopts]
|
||||
else:
|
||||
longopts = list(longopts)
|
||||
|
||||
# Allow options after non-option arguments?
|
||||
if shortopts.startswith('+'):
|
||||
shortopts = shortopts[1:]
|
||||
all_options_first = True
|
||||
elif os.environ.get("POSIXLY_CORRECT"):
|
||||
all_options_first = True
|
||||
else:
|
||||
all_options_first = False
|
||||
|
||||
while args:
|
||||
if args[0] == '--':
|
||||
prog_args += args[1:]
|
||||
break
|
||||
|
||||
if args[0][:2] == '--':
|
||||
opts, args = do_longs(opts, args[0][2:], longopts, args[1:])
|
||||
elif args[0][:1] == '-' and args[0] != '-':
|
||||
opts, args = do_shorts(opts, args[0][1:], shortopts, args[1:])
|
||||
else:
|
||||
if all_options_first:
|
||||
prog_args += args
|
||||
break
|
||||
else:
|
||||
prog_args.append(args[0])
|
||||
args = args[1:]
|
||||
|
||||
return opts, prog_args
|
||||
|
||||
def do_longs(opts, opt, longopts, args):
|
||||
try:
|
||||
i = opt.index('=')
|
||||
except ValueError:
|
||||
optarg = None
|
||||
else:
|
||||
opt, optarg = opt[:i], opt[i+1:]
|
||||
|
||||
has_arg, opt = long_has_args(opt, longopts)
|
||||
if has_arg:
|
||||
if optarg is None:
|
||||
if not args:
|
||||
raise GetoptError(_('option --%s requires argument') % opt, opt)
|
||||
optarg, args = args[0], args[1:]
|
||||
elif optarg is not None:
|
||||
raise GetoptError(_('option --%s must not have an argument') % opt, opt)
|
||||
opts.append(('--' + opt, optarg or ''))
|
||||
return opts, args
|
||||
|
||||
# Return:
|
||||
# has_arg?
|
||||
# full option name
|
||||
def long_has_args(opt, longopts):
|
||||
possibilities = [o for o in longopts if o.startswith(opt)]
|
||||
if not possibilities:
|
||||
raise GetoptError(_('option --%s not recognized') % opt, opt)
|
||||
# Is there an exact match?
|
||||
if opt in possibilities:
|
||||
return False, opt
|
||||
elif opt + '=' in possibilities:
|
||||
return True, opt
|
||||
# No exact match, so better be unique.
|
||||
if len(possibilities) > 1:
|
||||
# XXX since possibilities contains all valid continuations, might be
|
||||
# nice to work them into the error msg
|
||||
raise GetoptError(_('option --%s not a unique prefix') % opt, opt)
|
||||
assert len(possibilities) == 1
|
||||
unique_match = possibilities[0]
|
||||
has_arg = unique_match.endswith('=')
|
||||
if has_arg:
|
||||
unique_match = unique_match[:-1]
|
||||
return has_arg, unique_match
|
||||
|
||||
def do_shorts(opts, optstring, shortopts, args):
|
||||
while optstring != '':
|
||||
opt, optstring = optstring[0], optstring[1:]
|
||||
if short_has_arg(opt, shortopts):
|
||||
if optstring == '':
|
||||
if not args:
|
||||
raise GetoptError(_('option -%s requires argument') % opt,
|
||||
opt)
|
||||
optstring, args = args[0], args[1:]
|
||||
optarg, optstring = optstring, ''
|
||||
else:
|
||||
optarg = ''
|
||||
opts.append(('-' + opt, optarg))
|
||||
return opts, args
|
||||
|
||||
def short_has_arg(opt, shortopts):
|
||||
for i in range(len(shortopts)):
|
||||
if opt == shortopts[i] != ':':
|
||||
return shortopts.startswith(':', i+1)
|
||||
raise GetoptError(_('option -%s not recognized') % opt, opt)
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
print(getopt(sys.argv[1:], "a:b", ["alpha=", "beta"]))
|
|
@ -0,0 +1,14 @@
|
|||
import ffilib
|
||||
|
||||
libc = ffilib.libc()
|
||||
|
||||
gettext_ = libc.func("s", "gettext", "s")
|
||||
ngettext_ = libc.func("s", "ngettext", "ssL")
|
||||
|
||||
|
||||
def gettext(message):
|
||||
return gettext_(message)
|
||||
|
||||
|
||||
def ngettext(singular, plural, n):
|
||||
return ngettext_(singular, plural, n)
|
|
@ -0,0 +1,95 @@
|
|||
"""Filename globbing utility."""
|
||||
|
||||
import os
|
||||
import os.path
|
||||
import re
|
||||
import fnmatch
|
||||
|
||||
__all__ = ["glob", "iglob"]
|
||||
|
||||
def glob(pathname):
|
||||
"""Return a list of paths matching a pathname pattern.
|
||||
|
||||
The pattern may contain simple shell-style wildcards a la
|
||||
fnmatch. However, unlike fnmatch, filenames starting with a
|
||||
dot are special cases that are not matched by '*' and '?'
|
||||
patterns.
|
||||
|
||||
"""
|
||||
return list(iglob(pathname))
|
||||
|
||||
def iglob(pathname):
|
||||
"""Return an iterator which yields the paths matching a pathname pattern.
|
||||
|
||||
The pattern may contain simple shell-style wildcards a la
|
||||
fnmatch. However, unlike fnmatch, filenames starting with a
|
||||
dot are special cases that are not matched by '*' and '?'
|
||||
patterns.
|
||||
|
||||
"""
|
||||
if not has_magic(pathname):
|
||||
if os.path.lexists(pathname):
|
||||
yield pathname
|
||||
return
|
||||
dirname, basename = os.path.split(pathname)
|
||||
if not dirname:
|
||||
for name in glob1(None, basename):
|
||||
yield name
|
||||
return
|
||||
# `os.path.split()` returns the argument itself as a dirname if it is a
|
||||
# drive or UNC path. Prevent an infinite recursion if a drive or UNC path
|
||||
# contains magic characters (i.e. r'\\?\C:').
|
||||
if dirname != pathname and has_magic(dirname):
|
||||
dirs = iglob(dirname)
|
||||
else:
|
||||
dirs = [dirname]
|
||||
if has_magic(basename):
|
||||
glob_in_dir = glob1
|
||||
else:
|
||||
glob_in_dir = glob0
|
||||
for dirname in dirs:
|
||||
for name in glob_in_dir(dirname, basename):
|
||||
yield os.path.join(dirname, name)
|
||||
|
||||
# These 2 helper functions non-recursively glob inside a literal directory.
|
||||
# They return a list of basenames. `glob1` accepts a pattern while `glob0`
|
||||
# takes a literal basename (so it only has to check for its existence).
|
||||
|
||||
def glob1(dirname, pattern):
|
||||
if not dirname:
|
||||
if isinstance(pattern, bytes):
|
||||
dirname = bytes(os.curdir, 'ASCII')
|
||||
else:
|
||||
dirname = os.curdir
|
||||
try:
|
||||
names = os.listdir(dirname)
|
||||
except os.error:
|
||||
return []
|
||||
if not _ishidden(pattern):
|
||||
names = [x for x in names if not _ishidden(x)]
|
||||
return fnmatch.filter(names, pattern)
|
||||
|
||||
def glob0(dirname, basename):
|
||||
if not basename:
|
||||
# `os.path.split()` returns an empty basename for paths ending with a
|
||||
# directory separator. 'q*x/' should match only directories.
|
||||
if os.path.isdir(dirname):
|
||||
return [basename]
|
||||
else:
|
||||
if os.path.lexists(os.path.join(dirname, basename)):
|
||||
return [basename]
|
||||
return []
|
||||
|
||||
|
||||
magic_check = re.compile('[*?[]')
|
||||
magic_check_bytes = re.compile(b'[*?[]')
|
||||
|
||||
def has_magic(s):
|
||||
if isinstance(s, bytes):
|
||||
match = magic_check_bytes.search(s)
|
||||
else:
|
||||
match = magic_check.search(s)
|
||||
return match is not None
|
||||
|
||||
def _ishidden(path):
|
||||
return path[0] in ('.', b'.'[0])
|
|
@ -0,0 +1,28 @@
|
|||
#import zlib
|
||||
import uzlib as zlib
|
||||
|
||||
FTEXT = 1
|
||||
FHCRC = 2
|
||||
FEXTRA = 4
|
||||
FNAME = 8
|
||||
FCOMMENT = 16
|
||||
|
||||
def decompress(data):
|
||||
assert data[0] == 0x1f and data[1] == 0x8b
|
||||
assert data[2] == 8
|
||||
flg = data[3]
|
||||
assert flg & 0xe0 == 0
|
||||
i = 10
|
||||
if flg & FEXTRA:
|
||||
i += data[11] << 8 + data[10] + 2
|
||||
if flg & FNAME:
|
||||
while data[i]:
|
||||
i += 1
|
||||
i += 1
|
||||
if flg & FCOMMENT:
|
||||
while data[i]:
|
||||
i += 1
|
||||
i += 1
|
||||
if flg & FHCRC:
|
||||
i += 2
|
||||
return zlib.decompress(memoryview(data)[i:], -15)
|
|
@ -0,0 +1,22 @@
|
|||
try:
|
||||
import uhashlib
|
||||
except ImportError:
|
||||
uhashlib = None
|
||||
|
||||
def init():
|
||||
for i in ("sha1", "sha224", "sha256", "sha384", "sha512"):
|
||||
c = getattr(uhashlib, i, None)
|
||||
if not c:
|
||||
c = __import__("_" + i, None, None, (), 1)
|
||||
c = getattr(c, i)
|
||||
globals()[i] = c
|
||||
|
||||
init()
|
||||
|
||||
|
||||
def new(algo, data=b""):
|
||||
try:
|
||||
c = globals()[algo]
|
||||
return c(data)
|
||||
except KeyError:
|
||||
raise ValueError(algo)
|
|
@ -0,0 +1 @@
|
|||
from ._sha256 import sha224
|
|
@ -0,0 +1,264 @@
|
|||
SHA_BLOCKSIZE = 64
|
||||
SHA_DIGESTSIZE = 32
|
||||
|
||||
|
||||
def new_shaobject():
|
||||
return {
|
||||
'digest': [0]*8,
|
||||
'count_lo': 0,
|
||||
'count_hi': 0,
|
||||
'data': [0]* SHA_BLOCKSIZE,
|
||||
'local': 0,
|
||||
'digestsize': 0
|
||||
}
|
||||
|
||||
ROR = lambda x, y: (((x & 0xffffffff) >> (y & 31)) | (x << (32 - (y & 31)))) & 0xffffffff
|
||||
Ch = lambda x, y, z: (z ^ (x & (y ^ z)))
|
||||
Maj = lambda x, y, z: (((x | y) & z) | (x & y))
|
||||
S = lambda x, n: ROR(x, n)
|
||||
R = lambda x, n: (x & 0xffffffff) >> n
|
||||
Sigma0 = lambda x: (S(x, 2) ^ S(x, 13) ^ S(x, 22))
|
||||
Sigma1 = lambda x: (S(x, 6) ^ S(x, 11) ^ S(x, 25))
|
||||
Gamma0 = lambda x: (S(x, 7) ^ S(x, 18) ^ R(x, 3))
|
||||
Gamma1 = lambda x: (S(x, 17) ^ S(x, 19) ^ R(x, 10))
|
||||
|
||||
def sha_transform(sha_info):
|
||||
W = []
|
||||
|
||||
d = sha_info['data']
|
||||
for i in range(0,16):
|
||||
W.append( (d[4*i]<<24) + (d[4*i+1]<<16) + (d[4*i+2]<<8) + d[4*i+3])
|
||||
|
||||
for i in range(16,64):
|
||||
W.append( (Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]) & 0xffffffff )
|
||||
|
||||
ss = sha_info['digest'][:]
|
||||
|
||||
def RND(a,b,c,d,e,f,g,h,i,ki):
|
||||
t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[i];
|
||||
t1 = Sigma0(a) + Maj(a, b, c);
|
||||
d += t0;
|
||||
h = t0 + t1;
|
||||
return d & 0xffffffff, h & 0xffffffff
|
||||
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],0,0x428a2f98);
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],1,0x71374491);
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],2,0xb5c0fbcf);
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],3,0xe9b5dba5);
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],4,0x3956c25b);
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],5,0x59f111f1);
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],6,0x923f82a4);
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],7,0xab1c5ed5);
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],8,0xd807aa98);
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],9,0x12835b01);
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],10,0x243185be);
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],11,0x550c7dc3);
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],12,0x72be5d74);
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],13,0x80deb1fe);
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],14,0x9bdc06a7);
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],15,0xc19bf174);
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],16,0xe49b69c1);
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],17,0xefbe4786);
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],18,0x0fc19dc6);
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],19,0x240ca1cc);
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],20,0x2de92c6f);
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],21,0x4a7484aa);
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],22,0x5cb0a9dc);
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],23,0x76f988da);
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],24,0x983e5152);
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],25,0xa831c66d);
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],26,0xb00327c8);
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],27,0xbf597fc7);
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],28,0xc6e00bf3);
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],29,0xd5a79147);
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],30,0x06ca6351);
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],31,0x14292967);
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],32,0x27b70a85);
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],33,0x2e1b2138);
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],34,0x4d2c6dfc);
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],35,0x53380d13);
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],36,0x650a7354);
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],37,0x766a0abb);
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],38,0x81c2c92e);
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],39,0x92722c85);
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],40,0xa2bfe8a1);
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],41,0xa81a664b);
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],42,0xc24b8b70);
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],43,0xc76c51a3);
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],44,0xd192e819);
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],45,0xd6990624);
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],46,0xf40e3585);
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],47,0x106aa070);
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],48,0x19a4c116);
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],49,0x1e376c08);
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],50,0x2748774c);
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],51,0x34b0bcb5);
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],52,0x391c0cb3);
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],53,0x4ed8aa4a);
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],54,0x5b9cca4f);
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],55,0x682e6ff3);
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],56,0x748f82ee);
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],57,0x78a5636f);
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],58,0x84c87814);
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],59,0x8cc70208);
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],60,0x90befffa);
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],61,0xa4506ceb);
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],62,0xbef9a3f7);
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],63,0xc67178f2);
|
||||
|
||||
dig = []
|
||||
for i, x in enumerate(sha_info['digest']):
|
||||
dig.append( (x + ss[i]) & 0xffffffff )
|
||||
sha_info['digest'] = dig
|
||||
|
||||
def sha_init():
|
||||
sha_info = new_shaobject()
|
||||
sha_info['digest'] = [0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19]
|
||||
sha_info['count_lo'] = 0
|
||||
sha_info['count_hi'] = 0
|
||||
sha_info['local'] = 0
|
||||
sha_info['digestsize'] = 32
|
||||
return sha_info
|
||||
|
||||
def sha224_init():
|
||||
sha_info = new_shaobject()
|
||||
sha_info['digest'] = [0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4]
|
||||
sha_info['count_lo'] = 0
|
||||
sha_info['count_hi'] = 0
|
||||
sha_info['local'] = 0
|
||||
sha_info['digestsize'] = 28
|
||||
return sha_info
|
||||
|
||||
def getbuf(s):
|
||||
if isinstance(s, str):
|
||||
return s.encode('ascii')
|
||||
else:
|
||||
return bytes(s)
|
||||
|
||||
def sha_update(sha_info, buffer):
|
||||
if isinstance(buffer, str):
|
||||
raise TypeError("Unicode strings must be encoded before hashing")
|
||||
count = len(buffer)
|
||||
buffer_idx = 0
|
||||
clo = (sha_info['count_lo'] + (count << 3)) & 0xffffffff
|
||||
if clo < sha_info['count_lo']:
|
||||
sha_info['count_hi'] += 1
|
||||
sha_info['count_lo'] = clo
|
||||
|
||||
sha_info['count_hi'] += (count >> 29)
|
||||
|
||||
if sha_info['local']:
|
||||
i = SHA_BLOCKSIZE - sha_info['local']
|
||||
if i > count:
|
||||
i = count
|
||||
|
||||
# copy buffer
|
||||
for x in enumerate(buffer[buffer_idx:buffer_idx+i]):
|
||||
sha_info['data'][sha_info['local']+x[0]] = x[1]
|
||||
|
||||
count -= i
|
||||
buffer_idx += i
|
||||
|
||||
sha_info['local'] += i
|
||||
if sha_info['local'] == SHA_BLOCKSIZE:
|
||||
sha_transform(sha_info)
|
||||
sha_info['local'] = 0
|
||||
else:
|
||||
return
|
||||
|
||||
while count >= SHA_BLOCKSIZE:
|
||||
# copy buffer
|
||||
sha_info['data'] = list(buffer[buffer_idx:buffer_idx + SHA_BLOCKSIZE])
|
||||
count -= SHA_BLOCKSIZE
|
||||
buffer_idx += SHA_BLOCKSIZE
|
||||
sha_transform(sha_info)
|
||||
|
||||
|
||||
# copy buffer
|
||||
pos = sha_info['local']
|
||||
sha_info['data'][pos:pos+count] = list(buffer[buffer_idx:buffer_idx + count])
|
||||
sha_info['local'] = count
|
||||
|
||||
def sha_final(sha_info):
|
||||
lo_bit_count = sha_info['count_lo']
|
||||
hi_bit_count = sha_info['count_hi']
|
||||
count = (lo_bit_count >> 3) & 0x3f
|
||||
sha_info['data'][count] = 0x80;
|
||||
count += 1
|
||||
if count > SHA_BLOCKSIZE - 8:
|
||||
# zero the bytes in data after the count
|
||||
sha_info['data'] = sha_info['data'][:count] + ([0] * (SHA_BLOCKSIZE - count))
|
||||
sha_transform(sha_info)
|
||||
# zero bytes in data
|
||||
sha_info['data'] = [0] * SHA_BLOCKSIZE
|
||||
else:
|
||||
sha_info['data'] = sha_info['data'][:count] + ([0] * (SHA_BLOCKSIZE - count))
|
||||
|
||||
sha_info['data'][56] = (hi_bit_count >> 24) & 0xff
|
||||
sha_info['data'][57] = (hi_bit_count >> 16) & 0xff
|
||||
sha_info['data'][58] = (hi_bit_count >> 8) & 0xff
|
||||
sha_info['data'][59] = (hi_bit_count >> 0) & 0xff
|
||||
sha_info['data'][60] = (lo_bit_count >> 24) & 0xff
|
||||
sha_info['data'][61] = (lo_bit_count >> 16) & 0xff
|
||||
sha_info['data'][62] = (lo_bit_count >> 8) & 0xff
|
||||
sha_info['data'][63] = (lo_bit_count >> 0) & 0xff
|
||||
|
||||
sha_transform(sha_info)
|
||||
|
||||
dig = []
|
||||
for i in sha_info['digest']:
|
||||
dig.extend([ ((i>>24) & 0xff), ((i>>16) & 0xff), ((i>>8) & 0xff), (i & 0xff) ])
|
||||
return bytes(dig)
|
||||
|
||||
class sha256(object):
|
||||
digest_size = digestsize = SHA_DIGESTSIZE
|
||||
block_size = SHA_BLOCKSIZE
|
||||
|
||||
def __init__(self, s=None):
|
||||
self._sha = sha_init()
|
||||
if s:
|
||||
sha_update(self._sha, getbuf(s))
|
||||
|
||||
def update(self, s):
|
||||
sha_update(self._sha, getbuf(s))
|
||||
|
||||
def digest(self):
|
||||
return sha_final(self._sha.copy())[:self._sha['digestsize']]
|
||||
|
||||
def hexdigest(self):
|
||||
return ''.join(['%.2x' % i for i in self.digest()])
|
||||
|
||||
def copy(self):
|
||||
new = sha256()
|
||||
new._sha = self._sha.copy()
|
||||
return new
|
||||
|
||||
class sha224(sha256):
|
||||
digest_size = digestsize = 28
|
||||
|
||||
def __init__(self, s=None):
|
||||
self._sha = sha224_init()
|
||||
if s:
|
||||
sha_update(self._sha, getbuf(s))
|
||||
|
||||
def copy(self):
|
||||
new = sha224()
|
||||
new._sha = self._sha.copy()
|
||||
return new
|
||||
|
||||
def test():
|
||||
a_str = "just a test string"
|
||||
|
||||
assert b"\xe3\xb0\xc4B\x98\xfc\x1c\x14\x9a\xfb\xf4\xc8\x99o\xb9$'\xaeA\xe4d\x9b\x93L\xa4\x95\x99\x1bxR\xb8U" == sha256().digest()
|
||||
assert 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' == sha256().hexdigest()
|
||||
assert 'd7b553c6f09ac85d142415f857c5310f3bbbe7cdd787cce4b985acedd585266f' == sha256(a_str).hexdigest()
|
||||
assert '8113ebf33c97daa9998762aacafe750c7cefc2b2f173c90c59663a57fe626f21' == sha256(a_str*7).hexdigest()
|
||||
|
||||
s = sha256(a_str)
|
||||
s.update(a_str)
|
||||
assert '03d9963e05a094593190b6fc794cb1a3e1ac7d7883f0b5855268afeccc70d461' == s.hexdigest()
|
||||
|
||||
if __name__ == "__main__":
|
||||
test()
|
||||
|
||||
|
|
@ -0,0 +1 @@
|
|||
from ._sha512 import sha384
|
|
@ -0,0 +1,290 @@
|
|||
"""
|
||||
This code was Ported from CPython's sha512module.c
|
||||
"""
|
||||
|
||||
SHA_BLOCKSIZE = 128
|
||||
SHA_DIGESTSIZE = 64
|
||||
|
||||
|
||||
def new_shaobject():
|
||||
return {
|
||||
'digest': [0]*8,
|
||||
'count_lo': 0,
|
||||
'count_hi': 0,
|
||||
'data': [0]* SHA_BLOCKSIZE,
|
||||
'local': 0,
|
||||
'digestsize': 0
|
||||
}
|
||||
|
||||
ROR64 = lambda x, y: (((x & 0xffffffffffffffff) >> (y & 63)) | (x << (64 - (y & 63)))) & 0xffffffffffffffff
|
||||
Ch = lambda x, y, z: (z ^ (x & (y ^ z)))
|
||||
Maj = lambda x, y, z: (((x | y) & z) | (x & y))
|
||||
S = lambda x, n: ROR64(x, n)
|
||||
R = lambda x, n: (x & 0xffffffffffffffff) >> n
|
||||
Sigma0 = lambda x: (S(x, 28) ^ S(x, 34) ^ S(x, 39))
|
||||
Sigma1 = lambda x: (S(x, 14) ^ S(x, 18) ^ S(x, 41))
|
||||
Gamma0 = lambda x: (S(x, 1) ^ S(x, 8) ^ R(x, 7))
|
||||
Gamma1 = lambda x: (S(x, 19) ^ S(x, 61) ^ R(x, 6))
|
||||
|
||||
def sha_transform(sha_info):
|
||||
W = []
|
||||
|
||||
d = sha_info['data']
|
||||
for i in range(0,16):
|
||||
W.append( (d[8*i]<<56) + (d[8*i+1]<<48) + (d[8*i+2]<<40) + (d[8*i+3]<<32) + (d[8*i+4]<<24) + (d[8*i+5]<<16) + (d[8*i+6]<<8) + d[8*i+7])
|
||||
|
||||
for i in range(16,80):
|
||||
W.append( (Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]) & 0xffffffffffffffff )
|
||||
|
||||
ss = sha_info['digest'][:]
|
||||
|
||||
def RND(a,b,c,d,e,f,g,h,i,ki):
|
||||
t0 = (h + Sigma1(e) + Ch(e, f, g) + ki + W[i]) & 0xffffffffffffffff
|
||||
t1 = (Sigma0(a) + Maj(a, b, c)) & 0xffffffffffffffff
|
||||
d = (d + t0) & 0xffffffffffffffff
|
||||
h = (t0 + t1) & 0xffffffffffffffff
|
||||
return d & 0xffffffffffffffff, h & 0xffffffffffffffff
|
||||
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],0,0x428a2f98d728ae22)
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],1,0x7137449123ef65cd)
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],2,0xb5c0fbcfec4d3b2f)
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],3,0xe9b5dba58189dbbc)
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],4,0x3956c25bf348b538)
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],5,0x59f111f1b605d019)
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],6,0x923f82a4af194f9b)
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],7,0xab1c5ed5da6d8118)
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],8,0xd807aa98a3030242)
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],9,0x12835b0145706fbe)
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],10,0x243185be4ee4b28c)
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],11,0x550c7dc3d5ffb4e2)
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],12,0x72be5d74f27b896f)
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],13,0x80deb1fe3b1696b1)
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],14,0x9bdc06a725c71235)
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],15,0xc19bf174cf692694)
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],16,0xe49b69c19ef14ad2)
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],17,0xefbe4786384f25e3)
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],18,0x0fc19dc68b8cd5b5)
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],19,0x240ca1cc77ac9c65)
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],20,0x2de92c6f592b0275)
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],21,0x4a7484aa6ea6e483)
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],22,0x5cb0a9dcbd41fbd4)
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],23,0x76f988da831153b5)
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],24,0x983e5152ee66dfab)
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],25,0xa831c66d2db43210)
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],26,0xb00327c898fb213f)
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],27,0xbf597fc7beef0ee4)
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],28,0xc6e00bf33da88fc2)
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],29,0xd5a79147930aa725)
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],30,0x06ca6351e003826f)
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],31,0x142929670a0e6e70)
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],32,0x27b70a8546d22ffc)
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],33,0x2e1b21385c26c926)
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],34,0x4d2c6dfc5ac42aed)
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],35,0x53380d139d95b3df)
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],36,0x650a73548baf63de)
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],37,0x766a0abb3c77b2a8)
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],38,0x81c2c92e47edaee6)
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],39,0x92722c851482353b)
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],40,0xa2bfe8a14cf10364)
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],41,0xa81a664bbc423001)
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],42,0xc24b8b70d0f89791)
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],43,0xc76c51a30654be30)
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],44,0xd192e819d6ef5218)
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],45,0xd69906245565a910)
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],46,0xf40e35855771202a)
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],47,0x106aa07032bbd1b8)
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],48,0x19a4c116b8d2d0c8)
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],49,0x1e376c085141ab53)
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],50,0x2748774cdf8eeb99)
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],51,0x34b0bcb5e19b48a8)
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],52,0x391c0cb3c5c95a63)
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],53,0x4ed8aa4ae3418acb)
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],54,0x5b9cca4f7763e373)
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],55,0x682e6ff3d6b2b8a3)
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],56,0x748f82ee5defb2fc)
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],57,0x78a5636f43172f60)
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],58,0x84c87814a1f0ab72)
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],59,0x8cc702081a6439ec)
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],60,0x90befffa23631e28)
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],61,0xa4506cebde82bde9)
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],62,0xbef9a3f7b2c67915)
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],63,0xc67178f2e372532b)
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],64,0xca273eceea26619c)
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],65,0xd186b8c721c0c207)
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],66,0xeada7dd6cde0eb1e)
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],67,0xf57d4f7fee6ed178)
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],68,0x06f067aa72176fba)
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],69,0x0a637dc5a2c898a6)
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],70,0x113f9804bef90dae)
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],71,0x1b710b35131c471b)
|
||||
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],72,0x28db77f523047d84)
|
||||
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],73,0x32caab7b40c72493)
|
||||
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],74,0x3c9ebe0a15c9bebc)
|
||||
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],75,0x431d67c49c100d4c)
|
||||
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],76,0x4cc5d4becb3e42b6)
|
||||
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],77,0x597f299cfc657e2a)
|
||||
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],78,0x5fcb6fab3ad6faec)
|
||||
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],79,0x6c44198c4a475817)
|
||||
|
||||
dig = []
|
||||
for i, x in enumerate(sha_info['digest']):
|
||||
dig.append( (x + ss[i]) & 0xffffffffffffffff )
|
||||
sha_info['digest'] = dig
|
||||
|
||||
def sha_init():
|
||||
sha_info = new_shaobject()
|
||||
sha_info['digest'] = [ 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179]
|
||||
sha_info['count_lo'] = 0
|
||||
sha_info['count_hi'] = 0
|
||||
sha_info['local'] = 0
|
||||
sha_info['digestsize'] = 64
|
||||
return sha_info
|
||||
|
||||
def sha384_init():
|
||||
sha_info = new_shaobject()
|
||||
sha_info['digest'] = [ 0xcbbb9d5dc1059ed8, 0x629a292a367cd507, 0x9159015a3070dd17, 0x152fecd8f70e5939, 0x67332667ffc00b31, 0x8eb44a8768581511, 0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4]
|
||||
sha_info['count_lo'] = 0
|
||||
sha_info['count_hi'] = 0
|
||||
sha_info['local'] = 0
|
||||
sha_info['digestsize'] = 48
|
||||
return sha_info
|
||||
|
||||
def getbuf(s):
|
||||
if isinstance(s, str):
|
||||
return s.encode('ascii')
|
||||
else:
|
||||
return bytes(s)
|
||||
|
||||
def sha_update(sha_info, buffer):
|
||||
if isinstance(buffer, str):
|
||||
raise TypeError("Unicode strings must be encoded before hashing")
|
||||
count = len(buffer)
|
||||
buffer_idx = 0
|
||||
clo = (sha_info['count_lo'] + (count << 3)) & 0xffffffff
|
||||
if clo < sha_info['count_lo']:
|
||||
sha_info['count_hi'] += 1
|
||||
sha_info['count_lo'] = clo
|
||||
|
||||
sha_info['count_hi'] += (count >> 29)
|
||||
|
||||
if sha_info['local']:
|
||||
i = SHA_BLOCKSIZE - sha_info['local']
|
||||
if i > count:
|
||||
i = count
|
||||
|
||||
# copy buffer
|
||||
for x in enumerate(buffer[buffer_idx:buffer_idx+i]):
|
||||
sha_info['data'][sha_info['local']+x[0]] = x[1]
|
||||
|
||||
count -= i
|
||||
buffer_idx += i
|
||||
|
||||
sha_info['local'] += i
|
||||
if sha_info['local'] == SHA_BLOCKSIZE:
|
||||
sha_transform(sha_info)
|
||||
sha_info['local'] = 0
|
||||
else:
|
||||
return
|
||||
|
||||
while count >= SHA_BLOCKSIZE:
|
||||
# copy buffer
|
||||
sha_info['data'] = list(buffer[buffer_idx:buffer_idx + SHA_BLOCKSIZE])
|
||||
count -= SHA_BLOCKSIZE
|
||||
buffer_idx += SHA_BLOCKSIZE
|
||||
sha_transform(sha_info)
|
||||
|
||||
# copy buffer
|
||||
pos = sha_info['local']
|
||||
sha_info['data'][pos:pos+count] = list(buffer[buffer_idx:buffer_idx + count])
|
||||
sha_info['local'] = count
|
||||
|
||||
def sha_final(sha_info):
|
||||
lo_bit_count = sha_info['count_lo']
|
||||
hi_bit_count = sha_info['count_hi']
|
||||
count = (lo_bit_count >> 3) & 0x7f
|
||||
sha_info['data'][count] = 0x80;
|
||||
count += 1
|
||||
if count > SHA_BLOCKSIZE - 16:
|
||||
# zero the bytes in data after the count
|
||||
sha_info['data'] = sha_info['data'][:count] + ([0] * (SHA_BLOCKSIZE - count))
|
||||
sha_transform(sha_info)
|
||||
# zero bytes in data
|
||||
sha_info['data'] = [0] * SHA_BLOCKSIZE
|
||||
else:
|
||||
sha_info['data'] = sha_info['data'][:count] + ([0] * (SHA_BLOCKSIZE - count))
|
||||
|
||||
sha_info['data'][112] = 0;
|
||||
sha_info['data'][113] = 0;
|
||||
sha_info['data'][114] = 0;
|
||||
sha_info['data'][115] = 0;
|
||||
sha_info['data'][116] = 0;
|
||||
sha_info['data'][117] = 0;
|
||||
sha_info['data'][118] = 0;
|
||||
sha_info['data'][119] = 0;
|
||||
|
||||
sha_info['data'][120] = (hi_bit_count >> 24) & 0xff
|
||||
sha_info['data'][121] = (hi_bit_count >> 16) & 0xff
|
||||
sha_info['data'][122] = (hi_bit_count >> 8) & 0xff
|
||||
sha_info['data'][123] = (hi_bit_count >> 0) & 0xff
|
||||
sha_info['data'][124] = (lo_bit_count >> 24) & 0xff
|
||||
sha_info['data'][125] = (lo_bit_count >> 16) & 0xff
|
||||
sha_info['data'][126] = (lo_bit_count >> 8) & 0xff
|
||||
sha_info['data'][127] = (lo_bit_count >> 0) & 0xff
|
||||
|
||||
sha_transform(sha_info)
|
||||
|
||||
dig = []
|
||||
for i in sha_info['digest']:
|
||||
dig.extend([ ((i>>56) & 0xff), ((i>>48) & 0xff), ((i>>40) & 0xff), ((i>>32) & 0xff), ((i>>24) & 0xff), ((i>>16) & 0xff), ((i>>8) & 0xff), (i & 0xff) ])
|
||||
return bytes(dig)
|
||||
|
||||
class sha512(object):
|
||||
digest_size = digestsize = SHA_DIGESTSIZE
|
||||
block_size = SHA_BLOCKSIZE
|
||||
|
||||
def __init__(self, s=None):
|
||||
self._sha = sha_init()
|
||||
if s:
|
||||
sha_update(self._sha, getbuf(s))
|
||||
|
||||
def update(self, s):
|
||||
sha_update(self._sha, getbuf(s))
|
||||
|
||||
def digest(self):
|
||||
return sha_final(self._sha.copy())[:self._sha['digestsize']]
|
||||
|
||||
def hexdigest(self):
|
||||
return ''.join(['%.2x' % i for i in self.digest()])
|
||||
|
||||
def copy(self):
|
||||
new = sha512()
|
||||
new._sha = self._sha.copy()
|
||||
return new
|
||||
|
||||
class sha384(sha512):
|
||||
digest_size = digestsize = 48
|
||||
|
||||
def __init__(self, s=None):
|
||||
self._sha = sha384_init()
|
||||
if s:
|
||||
sha_update(self._sha, getbuf(s))
|
||||
|
||||
def copy(self):
|
||||
new = sha384()
|
||||
new._sha = self._sha.copy()
|
||||
return new
|
||||
|
||||
def test():
|
||||
a_str = "just a test string"
|
||||
|
||||
assert sha512().digest() == b"\xcf\x83\xe15~\xef\xb8\xbd\xf1T(P\xd6m\x80\x07\xd6 \xe4\x05\x0bW\x15\xdc\x83\xf4\xa9!\xd3l\xe9\xceG\xd0\xd1<]\x85\xf2\xb0\xff\x83\x18\xd2\x87~\xec/c\xb91\xbdGAz\x81\xa582z\xf9'\xda>"
|
||||
assert sha512().hexdigest() == 'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'
|
||||
assert sha512(a_str).hexdigest() == '68be4c6664af867dd1d01c8d77e963d87d77b702400c8fabae355a41b8927a5a5533a7f1c28509bbd65c5f3ac716f33be271fbda0ca018b71a84708c9fae8a53'
|
||||
assert sha512(a_str*7).hexdigest() == '3233acdbfcfff9bff9fc72401d31dbffa62bd24e9ec846f0578d647da73258d9f0879f7fde01fe2cc6516af3f343807fdef79e23d696c923d79931db46bf1819'
|
||||
|
||||
s = sha512(a_str)
|
||||
s.update(a_str)
|
||||
assert s.hexdigest() == '341aeb668730bbb48127d5531115f3c39d12cb9586a6ca770898398aff2411087cfe0b570689adf328cddeb1f00803acce6737a19f310b53bbdb0320828f75bb'
|
||||
|
||||
if __name__ == "__main__":
|
||||
test()
|
|
@ -0,0 +1,480 @@
|
|||
"""Heap queue algorithm (a.k.a. priority queue).
|
||||
|
||||
Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for
|
||||
all k, counting elements from 0. For the sake of comparison,
|
||||
non-existing elements are considered to be infinite. The interesting
|
||||
property of a heap is that a[0] is always its smallest element.
|
||||
|
||||
Usage:
|
||||
|
||||
heap = [] # creates an empty heap
|
||||
heappush(heap, item) # pushes a new item on the heap
|
||||
item = heappop(heap) # pops the smallest item from the heap
|
||||
item = heap[0] # smallest item on the heap without popping it
|
||||
heapify(x) # transforms list into a heap, in-place, in linear time
|
||||
item = heapreplace(heap, item) # pops and returns smallest item, and adds
|
||||
# new item; the heap size is unchanged
|
||||
|
||||
Our API differs from textbook heap algorithms as follows:
|
||||
|
||||
- We use 0-based indexing. This makes the relationship between the
|
||||
index for a node and the indexes for its children slightly less
|
||||
obvious, but is more suitable since Python uses 0-based indexing.
|
||||
|
||||
- Our heappop() method returns the smallest item, not the largest.
|
||||
|
||||
These two make it possible to view the heap as a regular Python list
|
||||
without surprises: heap[0] is the smallest item, and heap.sort()
|
||||
maintains the heap invariant!
|
||||
"""
|
||||
|
||||
# Original code by Kevin O'Connor, augmented by Tim Peters and Raymond Hettinger
|
||||
|
||||
"""Heap queues
|
||||
|
||||
[explanation by François Pinard]
|
||||
|
||||
Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for
|
||||
all k, counting elements from 0. For the sake of comparison,
|
||||
non-existing elements are considered to be infinite. The interesting
|
||||
property of a heap is that a[0] is always its smallest element.
|
||||
|
||||
The strange invariant above is meant to be an efficient memory
|
||||
representation for a tournament. The numbers below are `k', not a[k]:
|
||||
|
||||
0
|
||||
|
||||
1 2
|
||||
|
||||
3 4 5 6
|
||||
|
||||
7 8 9 10 11 12 13 14
|
||||
|
||||
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
|
||||
|
||||
|
||||
In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In
|
||||
an usual binary tournament we see in sports, each cell is the winner
|
||||
over the two cells it tops, and we can trace the winner down the tree
|
||||
to see all opponents s/he had. However, in many computer applications
|
||||
of such tournaments, we do not need to trace the history of a winner.
|
||||
To be more memory efficient, when a winner is promoted, we try to
|
||||
replace it by something else at a lower level, and the rule becomes
|
||||
that a cell and the two cells it tops contain three different items,
|
||||
but the top cell "wins" over the two topped cells.
|
||||
|
||||
If this heap invariant is protected at all time, index 0 is clearly
|
||||
the overall winner. The simplest algorithmic way to remove it and
|
||||
find the "next" winner is to move some loser (let's say cell 30 in the
|
||||
diagram above) into the 0 position, and then percolate this new 0 down
|
||||
the tree, exchanging values, until the invariant is re-established.
|
||||
This is clearly logarithmic on the total number of items in the tree.
|
||||
By iterating over all items, you get an O(n ln n) sort.
|
||||
|
||||
A nice feature of this sort is that you can efficiently insert new
|
||||
items while the sort is going on, provided that the inserted items are
|
||||
not "better" than the last 0'th element you extracted. This is
|
||||
especially useful in simulation contexts, where the tree holds all
|
||||
incoming events, and the "win" condition means the smallest scheduled
|
||||
time. When an event schedule other events for execution, they are
|
||||
scheduled into the future, so they can easily go into the heap. So, a
|
||||
heap is a good structure for implementing schedulers (this is what I
|
||||
used for my MIDI sequencer :-).
|
||||
|
||||
Various structures for implementing schedulers have been extensively
|
||||
studied, and heaps are good for this, as they are reasonably speedy,
|
||||
the speed is almost constant, and the worst case is not much different
|
||||
than the average case. However, there are other representations which
|
||||
are more efficient overall, yet the worst cases might be terrible.
|
||||
|
||||
Heaps are also very useful in big disk sorts. You most probably all
|
||||
know that a big sort implies producing "runs" (which are pre-sorted
|
||||
sequences, which size is usually related to the amount of CPU memory),
|
||||
followed by a merging passes for these runs, which merging is often
|
||||
very cleverly organised[1]. It is very important that the initial
|
||||
sort produces the longest runs possible. Tournaments are a good way
|
||||
to that. If, using all the memory available to hold a tournament, you
|
||||
replace and percolate items that happen to fit the current run, you'll
|
||||
produce runs which are twice the size of the memory for random input,
|
||||
and much better for input fuzzily ordered.
|
||||
|
||||
Moreover, if you output the 0'th item on disk and get an input which
|
||||
may not fit in the current tournament (because the value "wins" over
|
||||
the last output value), it cannot fit in the heap, so the size of the
|
||||
heap decreases. The freed memory could be cleverly reused immediately
|
||||
for progressively building a second heap, which grows at exactly the
|
||||
same rate the first heap is melting. When the first heap completely
|
||||
vanishes, you switch heaps and start a new run. Clever and quite
|
||||
effective!
|
||||
|
||||
In a word, heaps are useful memory structures to know. I use them in
|
||||
a few applications, and I think it is good to keep a `heap' module
|
||||
around. :-)
|
||||
|
||||
--------------------
|
||||
[1] The disk balancing algorithms which are current, nowadays, are
|
||||
more annoying than clever, and this is a consequence of the seeking
|
||||
capabilities of the disks. On devices which cannot seek, like big
|
||||
tape drives, the story was quite different, and one had to be very
|
||||
clever to ensure (far in advance) that each tape movement will be the
|
||||
most effective possible (that is, will best participate at
|
||||
"progressing" the merge). Some tapes were even able to read
|
||||
backwards, and this was also used to avoid the rewinding time.
|
||||
Believe me, real good tape sorts were quite spectacular to watch!
|
||||
From all times, sorting has always been a Great Art! :-)
|
||||
"""
|
||||
|
||||
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
|
||||
'nlargest', 'nsmallest', 'heappushpop']
|
||||
|
||||
#from itertools import count, tee, chain
|
||||
|
||||
def heappush(heap, item):
|
||||
"""Push item onto heap, maintaining the heap invariant."""
|
||||
heap.append(item)
|
||||
_siftdown(heap, 0, len(heap)-1)
|
||||
|
||||
def heappop(heap):
|
||||
"""Pop the smallest item off the heap, maintaining the heap invariant."""
|
||||
lastelt = heap.pop() # raises appropriate IndexError if heap is empty
|
||||
if heap:
|
||||
returnitem = heap[0]
|
||||
heap[0] = lastelt
|
||||
_siftup(heap, 0)
|
||||
else:
|
||||
returnitem = lastelt
|
||||
return returnitem
|
||||
|
||||
def heapreplace(heap, item):
|
||||
"""Pop and return the current smallest value, and add the new item.
|
||||
|
||||
This is more efficient than heappop() followed by heappush(), and can be
|
||||
more appropriate when using a fixed-size heap. Note that the value
|
||||
returned may be larger than item! That constrains reasonable uses of
|
||||
this routine unless written as part of a conditional replacement:
|
||||
|
||||
if item > heap[0]:
|
||||
item = heapreplace(heap, item)
|
||||
"""
|
||||
returnitem = heap[0] # raises appropriate IndexError if heap is empty
|
||||
heap[0] = item
|
||||
_siftup(heap, 0)
|
||||
return returnitem
|
||||
|
||||
def heappushpop(heap, item):
|
||||
"""Fast version of a heappush followed by a heappop."""
|
||||
if heap and heap[0] < item:
|
||||
item, heap[0] = heap[0], item
|
||||
_siftup(heap, 0)
|
||||
return item
|
||||
|
||||
def heapify(x):
|
||||
"""Transform list into a heap, in-place, in O(len(x)) time."""
|
||||
n = len(x)
|
||||
# Transform bottom-up. The largest index there's any point to looking at
|
||||
# is the largest with a child index in-range, so must have 2*i + 1 < n,
|
||||
# or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so
|
||||
# j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is
|
||||
# (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1.
|
||||
for i in reversed(range(n//2)):
|
||||
_siftup(x, i)
|
||||
|
||||
def _heappushpop_max(heap, item):
|
||||
"""Maxheap version of a heappush followed by a heappop."""
|
||||
if heap and item < heap[0]:
|
||||
item, heap[0] = heap[0], item
|
||||
_siftup_max(heap, 0)
|
||||
return item
|
||||
|
||||
def _heapify_max(x):
|
||||
"""Transform list into a maxheap, in-place, in O(len(x)) time."""
|
||||
n = len(x)
|
||||
for i in reversed(range(n//2)):
|
||||
_siftup_max(x, i)
|
||||
|
||||
def nlargest(n, iterable):
|
||||
"""Find the n largest elements in a dataset.
|
||||
|
||||
Equivalent to: sorted(iterable, reverse=True)[:n]
|
||||
"""
|
||||
from itertools import islice, count, tee, chain
|
||||
if n < 0:
|
||||
return []
|
||||
it = iter(iterable)
|
||||
result = list(islice(it, n))
|
||||
if not result:
|
||||
return result
|
||||
heapify(result)
|
||||
_heappushpop = heappushpop
|
||||
for elem in it:
|
||||
_heappushpop(result, elem)
|
||||
result.sort(reverse=True)
|
||||
return result
|
||||
|
||||
def nsmallest(n, iterable):
|
||||
"""Find the n smallest elements in a dataset.
|
||||
|
||||
Equivalent to: sorted(iterable)[:n]
|
||||
"""
|
||||
from itertools import islice, count, tee, chain
|
||||
if n < 0:
|
||||
return []
|
||||
it = iter(iterable)
|
||||
result = list(islice(it, n))
|
||||
if not result:
|
||||
return result
|
||||
_heapify_max(result)
|
||||
_heappushpop = _heappushpop_max
|
||||
for elem in it:
|
||||
_heappushpop(result, elem)
|
||||
result.sort()
|
||||
return result
|
||||
|
||||
# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos
|
||||
# is the index of a leaf with a possibly out-of-order value. Restore the
|
||||
# heap invariant.
|
||||
def _siftdown(heap, startpos, pos):
|
||||
newitem = heap[pos]
|
||||
# Follow the path to the root, moving parents down until finding a place
|
||||
# newitem fits.
|
||||
while pos > startpos:
|
||||
parentpos = (pos - 1) >> 1
|
||||
parent = heap[parentpos]
|
||||
if newitem < parent:
|
||||
heap[pos] = parent
|
||||
pos = parentpos
|
||||
continue
|
||||
break
|
||||
heap[pos] = newitem
|
||||
|
||||
# The child indices of heap index pos are already heaps, and we want to make
|
||||
# a heap at index pos too. We do this by bubbling the smaller child of
|
||||
# pos up (and so on with that child's children, etc) until hitting a leaf,
|
||||
# then using _siftdown to move the oddball originally at index pos into place.
|
||||
#
|
||||
# We *could* break out of the loop as soon as we find a pos where newitem <=
|
||||
# both its children, but turns out that's not a good idea, and despite that
|
||||
# many books write the algorithm that way. During a heap pop, the last array
|
||||
# element is sifted in, and that tends to be large, so that comparing it
|
||||
# against values starting from the root usually doesn't pay (= usually doesn't
|
||||
# get us out of the loop early). See Knuth, Volume 3, where this is
|
||||
# explained and quantified in an exercise.
|
||||
#
|
||||
# Cutting the # of comparisons is important, since these routines have no
|
||||
# way to extract "the priority" from an array element, so that intelligence
|
||||
# is likely to be hiding in custom comparison methods, or in array elements
|
||||
# storing (priority, record) tuples. Comparisons are thus potentially
|
||||
# expensive.
|
||||
#
|
||||
# On random arrays of length 1000, making this change cut the number of
|
||||
# comparisons made by heapify() a little, and those made by exhaustive
|
||||
# heappop() a lot, in accord with theory. Here are typical results from 3
|
||||
# runs (3 just to demonstrate how small the variance is):
|
||||
#
|
||||
# Compares needed by heapify Compares needed by 1000 heappops
|
||||
# -------------------------- --------------------------------
|
||||
# 1837 cut to 1663 14996 cut to 8680
|
||||
# 1855 cut to 1659 14966 cut to 8678
|
||||
# 1847 cut to 1660 15024 cut to 8703
|
||||
#
|
||||
# Building the heap by using heappush() 1000 times instead required
|
||||
# 2198, 2148, and 2219 compares: heapify() is more efficient, when
|
||||
# you can use it.
|
||||
#
|
||||
# The total compares needed by list.sort() on the same lists were 8627,
|
||||
# 8627, and 8632 (this should be compared to the sum of heapify() and
|
||||
# heappop() compares): list.sort() is (unsurprisingly!) more efficient
|
||||
# for sorting.
|
||||
|
||||
def _siftup(heap, pos):
|
||||
endpos = len(heap)
|
||||
startpos = pos
|
||||
newitem = heap[pos]
|
||||
# Bubble up the smaller child until hitting a leaf.
|
||||
childpos = 2*pos + 1 # leftmost child position
|
||||
while childpos < endpos:
|
||||
# Set childpos to index of smaller child.
|
||||
rightpos = childpos + 1
|
||||
if rightpos < endpos and not heap[childpos] < heap[rightpos]:
|
||||
childpos = rightpos
|
||||
# Move the smaller child up.
|
||||
heap[pos] = heap[childpos]
|
||||
pos = childpos
|
||||
childpos = 2*pos + 1
|
||||
# The leaf at pos is empty now. Put newitem there, and bubble it up
|
||||
# to its final resting place (by sifting its parents down).
|
||||
heap[pos] = newitem
|
||||
_siftdown(heap, startpos, pos)
|
||||
|
||||
def _siftdown_max(heap, startpos, pos):
|
||||
'Maxheap variant of _siftdown'
|
||||
newitem = heap[pos]
|
||||
# Follow the path to the root, moving parents down until finding a place
|
||||
# newitem fits.
|
||||
while pos > startpos:
|
||||
parentpos = (pos - 1) >> 1
|
||||
parent = heap[parentpos]
|
||||
if parent < newitem:
|
||||
heap[pos] = parent
|
||||
pos = parentpos
|
||||
continue
|
||||
break
|
||||
heap[pos] = newitem
|
||||
|
||||
def _siftup_max(heap, pos):
|
||||
'Maxheap variant of _siftup'
|
||||
endpos = len(heap)
|
||||
startpos = pos
|
||||
newitem = heap[pos]
|
||||
# Bubble up the larger child until hitting a leaf.
|
||||
childpos = 2*pos + 1 # leftmost child position
|
||||
while childpos < endpos:
|
||||
# Set childpos to index of larger child.
|
||||
rightpos = childpos + 1
|
||||
if rightpos < endpos and not heap[rightpos] < heap[childpos]:
|
||||
childpos = rightpos
|
||||
# Move the larger child up.
|
||||
heap[pos] = heap[childpos]
|
||||
pos = childpos
|
||||
childpos = 2*pos + 1
|
||||
# The leaf at pos is empty now. Put newitem there, and bubble it up
|
||||
# to its final resting place (by sifting its parents down).
|
||||
heap[pos] = newitem
|
||||
_siftdown_max(heap, startpos, pos)
|
||||
|
||||
# If available, use C implementation
|
||||
try:
|
||||
from _heapq import *
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
def merge(*iterables):
|
||||
'''Merge multiple sorted inputs into a single sorted output.
|
||||
|
||||
Similar to sorted(itertools.chain(*iterables)) but returns a generator,
|
||||
does not pull the data into memory all at once, and assumes that each of
|
||||
the input streams is already sorted (smallest to largest).
|
||||
|
||||
>>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
|
||||
[0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
|
||||
|
||||
'''
|
||||
_heappop, _heapreplace, _StopIteration = heappop, heapreplace, StopIteration
|
||||
_len = len
|
||||
|
||||
h = []
|
||||
h_append = h.append
|
||||
for itnum, it in enumerate(map(iter, iterables)):
|
||||
try:
|
||||
next = it.__next__
|
||||
h_append([next(), itnum, next])
|
||||
except _StopIteration:
|
||||
pass
|
||||
heapify(h)
|
||||
|
||||
while _len(h) > 1:
|
||||
try:
|
||||
while True:
|
||||
v, itnum, next = s = h[0]
|
||||
yield v
|
||||
s[0] = next() # raises StopIteration when exhausted
|
||||
_heapreplace(h, s) # restore heap condition
|
||||
except _StopIteration:
|
||||
_heappop(h) # remove empty iterator
|
||||
if h:
|
||||
# fast case when only a single iterator remains
|
||||
v, itnum, next = h[0]
|
||||
yield v
|
||||
yield from next.__self__
|
||||
|
||||
# Extend the implementations of nsmallest and nlargest to use a key= argument
|
||||
_nsmallest = nsmallest
|
||||
def nsmallest(n, iterable, key=None):
|
||||
"""Find the n smallest elements in a dataset.
|
||||
|
||||
Equivalent to: sorted(iterable, key=key)[:n]
|
||||
"""
|
||||
from itertools import islice, count, tee, chain
|
||||
# Short-cut for n==1 is to use min() when len(iterable)>0
|
||||
if n == 1:
|
||||
it = iter(iterable)
|
||||
head = list(islice(it, 1))
|
||||
if not head:
|
||||
return []
|
||||
if key is None:
|
||||
return [min(chain(head, it))]
|
||||
return [min(chain(head, it), key=key)]
|
||||
|
||||
# When n>=size, it's faster to use sorted()
|
||||
try:
|
||||
size = len(iterable)
|
||||
except (TypeError, AttributeError):
|
||||
pass
|
||||
else:
|
||||
if n >= size:
|
||||
return sorted(iterable, key=key)[:n]
|
||||
|
||||
# When key is none, use simpler decoration
|
||||
if key is None:
|
||||
it = zip(iterable, count()) # decorate
|
||||
result = _nsmallest(n, it)
|
||||
return [r[0] for r in result] # undecorate
|
||||
|
||||
# General case, slowest method
|
||||
in1, in2 = tee(iterable)
|
||||
it = zip(map(key, in1), count(), in2) # decorate
|
||||
result = _nsmallest(n, it)
|
||||
return [r[2] for r in result] # undecorate
|
||||
|
||||
_nlargest = nlargest
|
||||
def nlargest(n, iterable, key=None):
|
||||
"""Find the n largest elements in a dataset.
|
||||
|
||||
Equivalent to: sorted(iterable, key=key, reverse=True)[:n]
|
||||
"""
|
||||
|
||||
from itertools import islice, count, tee, chain
|
||||
# Short-cut for n==1 is to use max() when len(iterable)>0
|
||||
if n == 1:
|
||||
it = iter(iterable)
|
||||
head = list(islice(it, 1))
|
||||
if not head:
|
||||
return []
|
||||
if key is None:
|
||||
return [max(chain(head, it))]
|
||||
return [max(chain(head, it), key=key)]
|
||||
|
||||
# When n>=size, it's faster to use sorted()
|
||||
try:
|
||||
size = len(iterable)
|
||||
except (TypeError, AttributeError):
|
||||
pass
|
||||
else:
|
||||
if n >= size:
|
||||
return sorted(iterable, key=key, reverse=True)[:n]
|
||||
|
||||
# When key is none, use simpler decoration
|
||||
if key is None:
|
||||
it = zip(iterable, count(0,-1)) # decorate
|
||||
result = _nlargest(n, it)
|
||||
return [r[0] for r in result] # undecorate
|
||||
|
||||
# General case, slowest method
|
||||
in1, in2 = tee(iterable)
|
||||
it = zip(map(key, in1), count(0,-1), in2) # decorate
|
||||
result = _nlargest(n, it)
|
||||
return [r[2] for r in result] # undecorate
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Simple sanity test
|
||||
heap = []
|
||||
data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 0]
|
||||
for item in data:
|
||||
heappush(heap, item)
|
||||
sort = []
|
||||
while heap:
|
||||
sort.append(heappop(heap))
|
||||
print(sort)
|
||||
|
||||
import doctest
|
||||
doctest.testmod()
|
|
@ -0,0 +1,149 @@
|
|||
"""HMAC (Keyed-Hashing for Message Authentication) Python module.
|
||||
|
||||
Implements the HMAC algorithm as described by RFC 2104.
|
||||
"""
|
||||
|
||||
import warnings as _warnings
|
||||
#from _operator import _compare_digest as compare_digest
|
||||
import hashlib as _hashlib
|
||||
PendingDeprecationWarning = None
|
||||
RuntimeWarning = None
|
||||
|
||||
trans_5C = bytes((x ^ 0x5C) for x in range(256))
|
||||
trans_36 = bytes((x ^ 0x36) for x in range(256))
|
||||
|
||||
def translate(d, t):
|
||||
return bytes(t[x] for x in d)
|
||||
|
||||
# The size of the digests returned by HMAC depends on the underlying
|
||||
# hashing module used. Use digest_size from the instance of HMAC instead.
|
||||
digest_size = None
|
||||
|
||||
|
||||
|
||||
class HMAC:
|
||||
"""RFC 2104 HMAC class. Also complies with RFC 4231.
|
||||
|
||||
This supports the API for Cryptographic Hash Functions (PEP 247).
|
||||
"""
|
||||
blocksize = 64 # 512-bit HMAC; can be changed in subclasses.
|
||||
|
||||
def __init__(self, key, msg = None, digestmod = None):
|
||||
"""Create a new HMAC object.
|
||||
|
||||
key: key for the keyed hash object.
|
||||
msg: Initial input for the hash, if provided.
|
||||
digestmod: A module supporting PEP 247. *OR*
|
||||
A hashlib constructor returning a new hash object. *OR*
|
||||
A hash name suitable for hashlib.new().
|
||||
Defaults to hashlib.md5.
|
||||
Implicit default to hashlib.md5 is deprecated and will be
|
||||
removed in Python 3.6.
|
||||
|
||||
Note: key and msg must be a bytes or bytearray objects.
|
||||
"""
|
||||
|
||||
if not isinstance(key, (bytes, bytearray)):
|
||||
raise TypeError("key: expected bytes or bytearray, but got %r" % type(key).__name__)
|
||||
|
||||
if digestmod is None:
|
||||
_warnings.warn("HMAC() without an explicit digestmod argument "
|
||||
"is deprecated.", PendingDeprecationWarning, 2)
|
||||
digestmod = _hashlib.md5
|
||||
|
||||
if callable(digestmod):
|
||||
self.digest_cons = digestmod
|
||||
elif isinstance(digestmod, str):
|
||||
self.digest_cons = lambda d=b'': _hashlib.new(digestmod, d)
|
||||
else:
|
||||
self.digest_cons = lambda d=b'': digestmod.new(d)
|
||||
|
||||
self.outer = self.digest_cons()
|
||||
self.inner = self.digest_cons()
|
||||
self.digest_size = self.inner.digest_size
|
||||
|
||||
if hasattr(self.inner, 'block_size'):
|
||||
blocksize = self.inner.block_size
|
||||
if blocksize < 16:
|
||||
_warnings.warn('block_size of %d seems too small; using our '
|
||||
'default of %d.' % (blocksize, self.blocksize),
|
||||
RuntimeWarning, 2)
|
||||
blocksize = self.blocksize
|
||||
else:
|
||||
_warnings.warn('No block_size attribute on given digest object; '
|
||||
'Assuming %d.' % (self.blocksize),
|
||||
RuntimeWarning, 2)
|
||||
blocksize = self.blocksize
|
||||
|
||||
# self.blocksize is the default blocksize. self.block_size is
|
||||
# effective block size as well as the public API attribute.
|
||||
self.block_size = blocksize
|
||||
|
||||
if len(key) > blocksize:
|
||||
key = self.digest_cons(key).digest()
|
||||
|
||||
key = key + bytes(blocksize - len(key))
|
||||
self.outer.update(translate(key, trans_5C))
|
||||
self.inner.update(translate(key, trans_36))
|
||||
if msg is not None:
|
||||
self.update(msg)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return "hmac-" + self.inner.name
|
||||
|
||||
def update(self, msg):
|
||||
"""Update this hashing object with the string msg.
|
||||
"""
|
||||
self.inner.update(msg)
|
||||
|
||||
def copy(self):
|
||||
"""Return a separate copy of this hashing object.
|
||||
|
||||
An update to this copy won't affect the original object.
|
||||
"""
|
||||
# Call __new__ directly to avoid the expensive __init__.
|
||||
other = self.__class__.__new__(self.__class__)
|
||||
other.digest_cons = self.digest_cons
|
||||
other.digest_size = self.digest_size
|
||||
other.inner = self.inner.copy()
|
||||
other.outer = self.outer.copy()
|
||||
return other
|
||||
|
||||
def _current(self):
|
||||
"""Return a hash object for the current state.
|
||||
|
||||
To be used only internally with digest() and hexdigest().
|
||||
"""
|
||||
h = self.outer.copy()
|
||||
h.update(self.inner.digest())
|
||||
return h
|
||||
|
||||
def digest(self):
|
||||
"""Return the hash value of this hashing object.
|
||||
|
||||
This returns a string containing 8-bit data. The object is
|
||||
not altered in any way by this function; you can continue
|
||||
updating the object after calling this function.
|
||||
"""
|
||||
h = self._current()
|
||||
return h.digest()
|
||||
|
||||
def hexdigest(self):
|
||||
"""Like digest(), but returns a string of hexadecimal digits instead.
|
||||
"""
|
||||
h = self._current()
|
||||
return h.hexdigest()
|
||||
|
||||
def new(key, msg = None, digestmod = None):
|
||||
"""Create a new hashing object and return it.
|
||||
|
||||
key: The starting key for the hash.
|
||||
msg: if available, will immediately be hashed into the object's starting
|
||||
state.
|
||||
|
||||
You can now feed arbitrary strings into the object using its update()
|
||||
method, and can ask for the hash value at any time by calling its digest()
|
||||
method.
|
||||
"""
|
||||
return HMAC(key, msg, digestmod)
|
|
@ -0,0 +1,22 @@
|
|||
"""
|
||||
General functions for HTML manipulation.
|
||||
"""
|
||||
|
||||
|
||||
_escape_map = {ord('&'): '&', ord('<'): '<', ord('>'): '>'}
|
||||
_escape_map_full = {ord('&'): '&', ord('<'): '<', ord('>'): '>',
|
||||
ord('"'): '"', ord('\''): '''}
|
||||
|
||||
# NB: this is a candidate for a bytes/string polymorphic interface
|
||||
|
||||
def escape(s, quote=True):
|
||||
"""
|
||||
Replace special characters "&", "<" and ">" to HTML-safe sequences.
|
||||
If the optional flag quote is true (the default), the quotation mark
|
||||
characters, both double quote (") and single quote (') characters are also
|
||||
translated.
|
||||
"""
|
||||
import string
|
||||
if quote:
|
||||
return string.translate(s, _escape_map_full)
|
||||
return string.translate(s, _escape_map)
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,532 @@
|
|||
"""A parser for HTML and XHTML."""
|
||||
|
||||
# This file is based on sgmllib.py, but the API is slightly different.
|
||||
|
||||
# XXX There should be a way to distinguish between PCDATA (parsed
|
||||
# character data -- the normal case), RCDATA (replaceable character
|
||||
# data -- only char and entity references and end tags are special)
|
||||
# and CDATA (character data -- only end tags are special).
|
||||
|
||||
|
||||
import _markupbase
|
||||
import re
|
||||
import warnings
|
||||
|
||||
# Regular expressions used for parsing
|
||||
|
||||
interesting_normal = re.compile('[&<]')
|
||||
incomplete = re.compile('&[a-zA-Z#]')
|
||||
|
||||
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
|
||||
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
|
||||
|
||||
starttagopen = re.compile('<[a-zA-Z]')
|
||||
piclose = re.compile('>')
|
||||
commentclose = re.compile(r'--\s*>')
|
||||
tagfind = re.compile('([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*')
|
||||
# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
|
||||
# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
|
||||
tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\\x00]*')
|
||||
# Note:
|
||||
# 1) the strict attrfind isn't really strict, but we can't make it
|
||||
# correctly strict without breaking backward compatibility;
|
||||
# 2) if you change attrfind remember to update locatestarttagend too;
|
||||
# 3) if you change attrfind and/or locatestarttagend the parser will
|
||||
# explode, so don't do it.
|
||||
attrfind = re.compile(
|
||||
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
|
||||
r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
|
||||
attrfind_tolerant = re.compile(
|
||||
r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
|
||||
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')
|
||||
locatestarttagend = re.compile(r"""
|
||||
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
|
||||
(?:\s+ # whitespace before attribute name
|
||||
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
|
||||
(?:\s*=\s* # value indicator
|
||||
(?:'[^']*' # LITA-enclosed value
|
||||
|\"[^\"]*\" # LIT-enclosed value
|
||||
|[^'\">\s]+ # bare value
|
||||
)
|
||||
)?
|
||||
)
|
||||
)*
|
||||
\s* # trailing whitespace
|
||||
""", re.VERBOSE)
|
||||
locatestarttagend_tolerant = re.compile(r"""
|
||||
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
|
||||
(?:[\s/]* # optional whitespace before attribute name
|
||||
(?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
|
||||
(?:\s*=+\s* # value indicator
|
||||
(?:'[^']*' # LITA-enclosed value
|
||||
|"[^"]*" # LIT-enclosed value
|
||||
|(?!['"])[^>\s]* # bare value
|
||||
)
|
||||
(?:\s*,)* # possibly followed by a comma
|
||||
)?(?:\s|/(?!>))*
|
||||
)*
|
||||
)?
|
||||
\s* # trailing whitespace
|
||||
""", re.VERBOSE)
|
||||
endendtag = re.compile('>')
|
||||
# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
|
||||
# </ and the tag name, so maybe this should be fixed
|
||||
endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
|
||||
|
||||
|
||||
class HTMLParseError(Exception):
|
||||
"""Exception raised for all parse errors."""
|
||||
|
||||
def __init__(self, msg, position=(None, None)):
|
||||
assert msg
|
||||
self.msg = msg
|
||||
self.lineno = position[0]
|
||||
self.offset = position[1]
|
||||
|
||||
def __str__(self):
|
||||
result = self.msg
|
||||
if self.lineno is not None:
|
||||
result = result + ", at line %d" % self.lineno
|
||||
if self.offset is not None:
|
||||
result = result + ", column %d" % (self.offset + 1)
|
||||
return result
|
||||
|
||||
|
||||
class HTMLParser(_markupbase.ParserBase):
|
||||
"""Find tags and other markup and call handler functions.
|
||||
|
||||
Usage:
|
||||
p = HTMLParser()
|
||||
p.feed(data)
|
||||
...
|
||||
p.close()
|
||||
|
||||
Start tags are handled by calling self.handle_starttag() or
|
||||
self.handle_startendtag(); end tags by self.handle_endtag(). The
|
||||
data between tags is passed from the parser to the derived class
|
||||
by calling self.handle_data() with the data as argument (the data
|
||||
may be split up in arbitrary chunks). Entity references are
|
||||
passed by calling self.handle_entityref() with the entity
|
||||
reference as the argument. Numeric character references are
|
||||
passed to self.handle_charref() with the string containing the
|
||||
reference as the argument.
|
||||
"""
|
||||
|
||||
CDATA_CONTENT_ELEMENTS = ("script", "style")
|
||||
|
||||
def __init__(self, strict=False):
|
||||
"""Initialize and reset this instance.
|
||||
|
||||
If strict is set to False (the default) the parser will parse invalid
|
||||
markup, otherwise it will raise an error. Note that the strict mode
|
||||
is deprecated.
|
||||
"""
|
||||
if strict:
|
||||
warnings.warn("The strict mode is deprecated.",
|
||||
DeprecationWarning, stacklevel=2)
|
||||
self.strict = strict
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
"""Reset this instance. Loses all unprocessed data."""
|
||||
self.rawdata = ''
|
||||
self.lasttag = '???'
|
||||
self.interesting = interesting_normal
|
||||
self.cdata_elem = None
|
||||
_markupbase.ParserBase.reset(self)
|
||||
|
||||
def feed(self, data):
|
||||
r"""Feed data to the parser.
|
||||
|
||||
Call this as often as you want, with as little or as much text
|
||||
as you want (may include '\n').
|
||||
"""
|
||||
self.rawdata = self.rawdata + data
|
||||
self.goahead(0)
|
||||
|
||||
def close(self):
|
||||
"""Handle any buffered data."""
|
||||
self.goahead(1)
|
||||
|
||||
def error(self, message):
|
||||
raise HTMLParseError(message, self.getpos())
|
||||
|
||||
__starttag_text = None
|
||||
|
||||
def get_starttag_text(self):
|
||||
"""Return full source of start tag: '<...>'."""
|
||||
return self.__starttag_text
|
||||
|
||||
def set_cdata_mode(self, elem):
|
||||
self.cdata_elem = elem.lower()
|
||||
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
|
||||
|
||||
def clear_cdata_mode(self):
|
||||
self.interesting = interesting_normal
|
||||
self.cdata_elem = None
|
||||
|
||||
# Internal -- handle data as far as reasonable. May leave state
|
||||
# and data to be processed by a subsequent call. If 'end' is
|
||||
# true, force handling all data as if followed by EOF marker.
|
||||
def goahead(self, end):
|
||||
rawdata = self.rawdata
|
||||
i = 0
|
||||
n = len(rawdata)
|
||||
while i < n:
|
||||
match = self.interesting.search(rawdata, i) # < or &
|
||||
if match:
|
||||
j = match.start()
|
||||
else:
|
||||
if self.cdata_elem:
|
||||
break
|
||||
j = n
|
||||
if i < j: self.handle_data(rawdata[i:j])
|
||||
i = self.updatepos(i, j)
|
||||
if i == n: break
|
||||
startswith = rawdata.startswith
|
||||
if startswith('<', i):
|
||||
if starttagopen.match(rawdata, i): # < + letter
|
||||
k = self.parse_starttag(i)
|
||||
elif startswith("</", i):
|
||||
k = self.parse_endtag(i)
|
||||
elif startswith("<!--", i):
|
||||
k = self.parse_comment(i)
|
||||
elif startswith("<?", i):
|
||||
k = self.parse_pi(i)
|
||||
elif startswith("<!", i):
|
||||
if self.strict:
|
||||
k = self.parse_declaration(i)
|
||||
else:
|
||||
k = self.parse_html_declaration(i)
|
||||
elif (i + 1) < n:
|
||||
self.handle_data("<")
|
||||
k = i + 1
|
||||
else:
|
||||
break
|
||||
if k < 0:
|
||||
if not end:
|
||||
break
|
||||
if self.strict:
|
||||
self.error("EOF in middle of construct")
|
||||
k = rawdata.find('>', i + 1)
|
||||
if k < 0:
|
||||
k = rawdata.find('<', i + 1)
|
||||
if k < 0:
|
||||
k = i + 1
|
||||
else:
|
||||
k += 1
|
||||
self.handle_data(rawdata[i:k])
|
||||
i = self.updatepos(i, k)
|
||||
elif startswith("&#", i):
|
||||
match = charref.match(rawdata, i)
|
||||
if match:
|
||||
name = match.group()[2:-1]
|
||||
self.handle_charref(name)
|
||||
k = match.end()
|
||||
if not startswith(';', k-1):
|
||||
k = k - 1
|
||||
i = self.updatepos(i, k)
|
||||
continue
|
||||
else:
|
||||
if ";" in rawdata[i:]: #bail by consuming &#
|
||||
self.handle_data(rawdata[0:2])
|
||||
i = self.updatepos(i, 2)
|
||||
break
|
||||
elif startswith('&', i):
|
||||
match = entityref.match(rawdata, i)
|
||||
if match:
|
||||
name = match.group(1)
|
||||
self.handle_entityref(name)
|
||||
k = match.end()
|
||||
if not startswith(';', k-1):
|
||||
k = k - 1
|
||||
i = self.updatepos(i, k)
|
||||
continue
|
||||
match = incomplete.match(rawdata, i)
|
||||
if match:
|
||||
# match.group() will contain at least 2 chars
|
||||
if end and match.group() == rawdata[i:]:
|
||||
if self.strict:
|
||||
self.error("EOF in middle of entity or char ref")
|
||||
else:
|
||||
k = match.end()
|
||||
if k <= i:
|
||||
k = n
|
||||
i = self.updatepos(i, i + 1)
|
||||
# incomplete
|
||||
break
|
||||
elif (i + 1) < n:
|
||||
# not the end of the buffer, and can't be confused
|
||||
# with some other construct
|
||||
self.handle_data("&")
|
||||
i = self.updatepos(i, i + 1)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
assert 0, "interesting.search() lied"
|
||||
# end while
|
||||
if end and i < n and not self.cdata_elem:
|
||||
self.handle_data(rawdata[i:n])
|
||||
i = self.updatepos(i, n)
|
||||
self.rawdata = rawdata[i:]
|
||||
|
||||
# Internal -- parse html declarations, return length or -1 if not terminated
|
||||
# See w3.org/TR/html5/tokenization.html#markup-declaration-open-state
|
||||
# See also parse_declaration in _markupbase
|
||||
def parse_html_declaration(self, i):
|
||||
rawdata = self.rawdata
|
||||
assert rawdata[i:i+2] == '<!', ('unexpected call to '
|
||||
'parse_html_declaration()')
|
||||
if rawdata[i:i+4] == '<!--':
|
||||
# this case is actually already handled in goahead()
|
||||
return self.parse_comment(i)
|
||||
elif rawdata[i:i+3] == '<![':
|
||||
return self.parse_marked_section(i)
|
||||
elif rawdata[i:i+9].lower() == '<!doctype':
|
||||
# find the closing >
|
||||
gtpos = rawdata.find('>', i+9)
|
||||
if gtpos == -1:
|
||||
return -1
|
||||
self.handle_decl(rawdata[i+2:gtpos])
|
||||
return gtpos+1
|
||||
else:
|
||||
return self.parse_bogus_comment(i)
|
||||
|
||||
# Internal -- parse bogus comment, return length or -1 if not terminated
|
||||
# see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
|
||||
def parse_bogus_comment(self, i, report=1):
|
||||
rawdata = self.rawdata
|
||||
assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to '
|
||||
'parse_comment()')
|
||||
pos = rawdata.find('>', i+2)
|
||||
if pos == -1:
|
||||
return -1
|
||||
if report:
|
||||
self.handle_comment(rawdata[i+2:pos])
|
||||
return pos + 1
|
||||
|
||||
# Internal -- parse processing instr, return end or -1 if not terminated
|
||||
def parse_pi(self, i):
|
||||
rawdata = self.rawdata
|
||||
assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
|
||||
match = piclose.search(rawdata, i+2) # >
|
||||
if not match:
|
||||
return -1
|
||||
j = match.start()
|
||||
self.handle_pi(rawdata[i+2: j])
|
||||
j = match.end()
|
||||
return j
|
||||
|
||||
# Internal -- handle starttag, return end or -1 if not terminated
|
||||
def parse_starttag(self, i):
|
||||
self.__starttag_text = None
|
||||
endpos = self.check_for_whole_start_tag(i)
|
||||
if endpos < 0:
|
||||
return endpos
|
||||
rawdata = self.rawdata
|
||||
self.__starttag_text = rawdata[i:endpos]
|
||||
|
||||
# Now parse the data between i+1 and j into a tag and attrs
|
||||
attrs = []
|
||||
match = tagfind.match(rawdata, i+1)
|
||||
assert match, 'unexpected call to parse_starttag()'
|
||||
k = match.end()
|
||||
self.lasttag = tag = match.group(1).lower()
|
||||
while k < endpos:
|
||||
if self.strict:
|
||||
m = attrfind.match(rawdata, k)
|
||||
else:
|
||||
m = attrfind_tolerant.match(rawdata, k)
|
||||
if not m:
|
||||
break
|
||||
attrname, rest, attrvalue = m.group(1, 2, 3)
|
||||
if not rest:
|
||||
attrvalue = None
|
||||
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
|
||||
attrvalue[:1] == '"' == attrvalue[-1:]:
|
||||
attrvalue = attrvalue[1:-1]
|
||||
if attrvalue:
|
||||
attrvalue = self.unescape(attrvalue)
|
||||
attrs.append((attrname.lower(), attrvalue))
|
||||
k = m.end()
|
||||
|
||||
end = rawdata[k:endpos].strip()
|
||||
if end not in (">", "/>"):
|
||||
lineno, offset = self.getpos()
|
||||
if "\n" in self.__starttag_text:
|
||||
lineno = lineno + self.__starttag_text.count("\n")
|
||||
offset = len(self.__starttag_text) \
|
||||
- self.__starttag_text.rfind("\n")
|
||||
else:
|
||||
offset = offset + len(self.__starttag_text)
|
||||
if self.strict:
|
||||
self.error("junk characters in start tag: %r"
|
||||
% (rawdata[k:endpos][:20],))
|
||||
self.handle_data(rawdata[i:endpos])
|
||||
return endpos
|
||||
if end.endswith('/>'):
|
||||
# XHTML-style empty tag: <span attr="value" />
|
||||
self.handle_startendtag(tag, attrs)
|
||||
else:
|
||||
self.handle_starttag(tag, attrs)
|
||||
if tag in self.CDATA_CONTENT_ELEMENTS:
|
||||
self.set_cdata_mode(tag)
|
||||
return endpos
|
||||
|
||||
# Internal -- check to see if we have a complete starttag; return end
|
||||
# or -1 if incomplete.
|
||||
def check_for_whole_start_tag(self, i):
|
||||
rawdata = self.rawdata
|
||||
if self.strict:
|
||||
m = locatestarttagend.match(rawdata, i)
|
||||
else:
|
||||
m = locatestarttagend_tolerant.match(rawdata, i)
|
||||
if m:
|
||||
j = m.end()
|
||||
next = rawdata[j:j+1]
|
||||
if next == ">":
|
||||
return j + 1
|
||||
if next == "/":
|
||||
if rawdata.startswith("/>", j):
|
||||
return j + 2
|
||||
if rawdata.startswith("/", j):
|
||||
# buffer boundary
|
||||
return -1
|
||||
# else bogus input
|
||||
if self.strict:
|
||||
self.updatepos(i, j + 1)
|
||||
self.error("malformed empty start tag")
|
||||
if j > i:
|
||||
return j
|
||||
else:
|
||||
return i + 1
|
||||
if next == "":
|
||||
# end of input
|
||||
return -1
|
||||
if next in ("abcdefghijklmnopqrstuvwxyz=/"
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
|
||||
# end of input in or before attribute value, or we have the
|
||||
# '/' from a '/>' ending
|
||||
return -1
|
||||
if self.strict:
|
||||
self.updatepos(i, j)
|
||||
self.error("malformed start tag")
|
||||
if j > i:
|
||||
return j
|
||||
else:
|
||||
return i + 1
|
||||
raise AssertionError("we should not get here!")
|
||||
|
||||
# Internal -- parse endtag, return end or -1 if incomplete
|
||||
def parse_endtag(self, i):
|
||||
rawdata = self.rawdata
|
||||
assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
|
||||
match = endendtag.search(rawdata, i+1) # >
|
||||
if not match:
|
||||
return -1
|
||||
gtpos = match.end()
|
||||
match = endtagfind.match(rawdata, i) # </ + tag + >
|
||||
if not match:
|
||||
if self.cdata_elem is not None:
|
||||
self.handle_data(rawdata[i:gtpos])
|
||||
return gtpos
|
||||
if self.strict:
|
||||
self.error("bad end tag: %r" % (rawdata[i:gtpos],))
|
||||
# find the name: w3.org/TR/html5/tokenization.html#tag-name-state
|
||||
namematch = tagfind_tolerant.match(rawdata, i+2)
|
||||
if not namematch:
|
||||
# w3.org/TR/html5/tokenization.html#end-tag-open-state
|
||||
if rawdata[i:i+3] == '</>':
|
||||
return i+3
|
||||
else:
|
||||
return self.parse_bogus_comment(i)
|
||||
tagname = namematch.group().lower()
|
||||
# consume and ignore other stuff between the name and the >
|
||||
# Note: this is not 100% correct, since we might have things like
|
||||
# </tag attr=">">, but looking for > after tha name should cover
|
||||
# most of the cases and is much simpler
|
||||
gtpos = rawdata.find('>', namematch.end())
|
||||
self.handle_endtag(tagname)
|
||||
return gtpos+1
|
||||
|
||||
elem = match.group(1).lower() # script or style
|
||||
if self.cdata_elem is not None:
|
||||
if elem != self.cdata_elem:
|
||||
self.handle_data(rawdata[i:gtpos])
|
||||
return gtpos
|
||||
|
||||
self.handle_endtag(elem.lower())
|
||||
self.clear_cdata_mode()
|
||||
return gtpos
|
||||
|
||||
# Overridable -- finish processing of start+end tag: <tag.../>
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
self.handle_starttag(tag, attrs)
|
||||
self.handle_endtag(tag)
|
||||
|
||||
# Overridable -- handle start tag
|
||||
def handle_starttag(self, tag, attrs):
|
||||
pass
|
||||
|
||||
# Overridable -- handle end tag
|
||||
def handle_endtag(self, tag):
|
||||
pass
|
||||
|
||||
# Overridable -- handle character reference
|
||||
def handle_charref(self, name):
|
||||
pass
|
||||
|
||||
# Overridable -- handle entity reference
|
||||
def handle_entityref(self, name):
|
||||
pass
|
||||
|
||||
# Overridable -- handle data
|
||||
def handle_data(self, data):
|
||||
pass
|
||||
|
||||
# Overridable -- handle comment
|
||||
def handle_comment(self, data):
|
||||
pass
|
||||
|
||||
# Overridable -- handle declaration
|
||||
def handle_decl(self, decl):
|
||||
pass
|
||||
|
||||
# Overridable -- handle processing instruction
|
||||
def handle_pi(self, data):
|
||||
pass
|
||||
|
||||
def unknown_decl(self, data):
|
||||
if self.strict:
|
||||
self.error("unknown declaration: %r" % (data,))
|
||||
|
||||
# Internal -- helper to remove special character quoting
|
||||
def unescape(self, s):
|
||||
if '&' not in s:
|
||||
return s
|
||||
def replaceEntities(s):
|
||||
s = s.groups()[0]
|
||||
try:
|
||||
if s[0] == "#":
|
||||
s = s[1:]
|
||||
if s[0] in ['x','X']:
|
||||
c = int(s[1:].rstrip(';'), 16)
|
||||
else:
|
||||
c = int(s.rstrip(';'))
|
||||
return chr(c)
|
||||
except ValueError:
|
||||
return '&#' + s
|
||||
else:
|
||||
from html.entities import html5
|
||||
if s in html5:
|
||||
return html5[s]
|
||||
elif s.endswith(';'):
|
||||
return '&' + s
|
||||
for x in range(2, len(s)):
|
||||
if s[:x] in html5:
|
||||
return html5[s[:x]] + s[x:]
|
||||
else:
|
||||
return '&' + s
|
||||
|
||||
return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?))",
|
||||
replaceEntities, s, flags=re.ASCII)
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,59 @@
|
|||
import sys
|
||||
|
||||
|
||||
def getmembers(obj, pred=None):
|
||||
res = []
|
||||
for name in dir(obj):
|
||||
val = getattr(obj, name)
|
||||
if pred is None or pred(val):
|
||||
res.append((name, val))
|
||||
res.sort()
|
||||
return res
|
||||
|
||||
def isfunction(obj):
|
||||
return isinstance(obj, type(isfunction))
|
||||
|
||||
def isgeneratorfunction(obj):
|
||||
return isinstance(obj, type(lambda:(yield)))
|
||||
|
||||
def isgenerator(obj):
|
||||
return isinstance(obj, type(lambda:(yield)()))
|
||||
|
||||
class _Class:
|
||||
def meth(): pass
|
||||
_Instance = _Class()
|
||||
|
||||
def ismethod(obj):
|
||||
return isinstance(obj, type(_Instance.meth))
|
||||
|
||||
def isclass(obj):
|
||||
return isinstance(obj, type)
|
||||
|
||||
def ismodule(obj):
|
||||
return isinstance(obj, type(sys))
|
||||
|
||||
|
||||
def getargspec(func):
|
||||
raise NotImplementedError("This is over-dynamic function, not supported by MicroPython")
|
||||
|
||||
def getmodule(obj, _filename=None):
|
||||
return None # Not known
|
||||
|
||||
def getmro(cls):
|
||||
return [cls]
|
||||
|
||||
def getsourcefile(obj):
|
||||
return None # Not known
|
||||
|
||||
def getfile(obj):
|
||||
return "<unknown>"
|
||||
|
||||
def getsource(obj):
|
||||
return "<source redacted to save you memory>"
|
||||
|
||||
|
||||
def currentframe():
|
||||
return None
|
||||
|
||||
def getframeinfo(frame, context=1):
|
||||
return ("<unknown>", -1, "<unknown>", [""], 0)
|
|
@ -0,0 +1,5 @@
|
|||
from uio import *
|
||||
|
||||
SEEK_SET = 0
|
||||
SEEK_CUR = 1
|
||||
SEEK_END = 2
|
|
@ -0,0 +1,68 @@
|
|||
def count(start=0, step=1):
|
||||
while True:
|
||||
yield start
|
||||
start += step
|
||||
|
||||
def cycle(p):
|
||||
try:
|
||||
len(p)
|
||||
except TypeError:
|
||||
# len() is not defined for this type. Assume it is
|
||||
# a finite iterable so we must cache the elements.
|
||||
cache = []
|
||||
for i in p:
|
||||
yield i
|
||||
cache.append(i)
|
||||
p = cache
|
||||
while p:
|
||||
yield from p
|
||||
|
||||
|
||||
def repeat(el, n=None):
|
||||
if n is None:
|
||||
while True:
|
||||
yield el
|
||||
else:
|
||||
for i in range(n):
|
||||
yield el
|
||||
|
||||
def chain(*p):
|
||||
for i in p:
|
||||
yield from i
|
||||
|
||||
def islice(p, start, stop=(), step=1):
|
||||
if stop == ():
|
||||
stop = start
|
||||
start = 0
|
||||
# TODO: optimizing or breaking semantics?
|
||||
if start >= stop:
|
||||
return
|
||||
it = iter(p)
|
||||
for i in range(start):
|
||||
next(it)
|
||||
|
||||
while True:
|
||||
yield next(it)
|
||||
for i in range(step - 1):
|
||||
next(it)
|
||||
start += step
|
||||
if start >= stop:
|
||||
return
|
||||
|
||||
def tee(iterable, n=2):
|
||||
return [iter(iterable)] * n
|
||||
|
||||
def starmap(function, iterable):
|
||||
for args in iterable:
|
||||
yield function(*args)
|
||||
|
||||
def accumulate(iterable, func=lambda x, y: x + y):
|
||||
it = iter(iterable)
|
||||
try:
|
||||
acc = next(it)
|
||||
except StopIteration:
|
||||
return
|
||||
yield acc
|
||||
for element in it:
|
||||
acc = func(acc, element)
|
||||
yield acc
|
|
@ -0,0 +1,332 @@
|
|||
r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
|
||||
JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
|
||||
interchange format.
|
||||
|
||||
:mod:`json` exposes an API familiar to users of the standard library
|
||||
:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
|
||||
version of the :mod:`json` library contained in Python 2.6, but maintains
|
||||
compatibility with Python 2.4 and Python 2.5 and (currently) has
|
||||
significant performance advantages, even without using the optional C
|
||||
extension for speedups.
|
||||
|
||||
Encoding basic Python object hierarchies::
|
||||
|
||||
>>> import json
|
||||
>>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
|
||||
'["foo", {"bar": ["baz", null, 1.0, 2]}]'
|
||||
>>> print(json.dumps("\"foo\bar"))
|
||||
"\"foo\bar"
|
||||
>>> print(json.dumps('\u1234'))
|
||||
"\u1234"
|
||||
>>> print(json.dumps('\\'))
|
||||
"\\"
|
||||
>>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True))
|
||||
{"a": 0, "b": 0, "c": 0}
|
||||
>>> from io import StringIO
|
||||
>>> io = StringIO()
|
||||
>>> json.dump(['streaming API'], io)
|
||||
>>> io.getvalue()
|
||||
'["streaming API"]'
|
||||
|
||||
Compact encoding::
|
||||
|
||||
>>> import json
|
||||
>>> from collections import OrderedDict
|
||||
>>> mydict = OrderedDict([('4', 5), ('6', 7)])
|
||||
>>> json.dumps([1,2,3,mydict], separators=(',', ':'))
|
||||
'[1,2,3,{"4":5,"6":7}]'
|
||||
|
||||
Pretty printing::
|
||||
|
||||
>>> import json
|
||||
>>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True,
|
||||
... indent=4, separators=(',', ': ')))
|
||||
{
|
||||
"4": 5,
|
||||
"6": 7
|
||||
}
|
||||
|
||||
Decoding JSON::
|
||||
|
||||
>>> import json
|
||||
>>> obj = ['foo', {'bar': ['baz', None, 1.0, 2]}]
|
||||
>>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
|
||||
True
|
||||
>>> json.loads('"\\"foo\\bar"') == '"foo\x08ar'
|
||||
True
|
||||
>>> from io import StringIO
|
||||
>>> io = StringIO('["streaming API"]')
|
||||
>>> json.load(io)[0] == 'streaming API'
|
||||
True
|
||||
|
||||
Specializing JSON object decoding::
|
||||
|
||||
>>> import json
|
||||
>>> def as_complex(dct):
|
||||
... if '__complex__' in dct:
|
||||
... return complex(dct['real'], dct['imag'])
|
||||
... return dct
|
||||
...
|
||||
>>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
|
||||
... object_hook=as_complex)
|
||||
(1+2j)
|
||||
>>> from decimal import Decimal
|
||||
>>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
|
||||
True
|
||||
|
||||
Specializing JSON object encoding::
|
||||
|
||||
>>> import json
|
||||
>>> def encode_complex(obj):
|
||||
... if isinstance(obj, complex):
|
||||
... return [obj.real, obj.imag]
|
||||
... raise TypeError(repr(o) + " is not JSON serializable")
|
||||
...
|
||||
>>> json.dumps(2 + 1j, default=encode_complex)
|
||||
'[2.0, 1.0]'
|
||||
>>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
|
||||
'[2.0, 1.0]'
|
||||
>>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
|
||||
'[2.0, 1.0]'
|
||||
|
||||
|
||||
Using json.tool from the shell to validate and pretty-print::
|
||||
|
||||
$ echo '{"json":"obj"}' | python -m json.tool
|
||||
{
|
||||
"json": "obj"
|
||||
}
|
||||
$ echo '{ 1.2:3.4}' | python -m json.tool
|
||||
Expecting property name enclosed in double quotes: line 1 column 3 (char 2)
|
||||
"""
|
||||
__version__ = '2.0.9'
|
||||
__all__ = [
|
||||
'dump', 'dumps', 'load', 'loads',
|
||||
'JSONDecoder', 'JSONEncoder',
|
||||
]
|
||||
|
||||
__author__ = 'Bob Ippolito <bob@redivi.com>'
|
||||
|
||||
from .decoder import JSONDecoder
|
||||
from .encoder import JSONEncoder
|
||||
|
||||
_default_encoder = JSONEncoder(
|
||||
skipkeys=False,
|
||||
ensure_ascii=True,
|
||||
check_circular=True,
|
||||
allow_nan=True,
|
||||
indent=None,
|
||||
separators=None,
|
||||
default=None,
|
||||
)
|
||||
|
||||
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
|
||||
allow_nan=True, cls=None, indent=None, separators=None,
|
||||
default=None, sort_keys=False, **kw):
|
||||
"""Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
|
||||
``.write()``-supporting file-like object).
|
||||
|
||||
If ``skipkeys`` is true then ``dict`` keys that are not basic types
|
||||
(``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped
|
||||
instead of raising a ``TypeError``.
|
||||
|
||||
If ``ensure_ascii`` is false, then the strings written to ``fp`` can
|
||||
contain non-ASCII characters if they appear in strings contained in
|
||||
``obj``. Otherwise, all such characters are escaped in JSON strings.
|
||||
|
||||
If ``check_circular`` is false, then the circular reference check
|
||||
for container types will be skipped and a circular reference will
|
||||
result in an ``OverflowError`` (or worse).
|
||||
|
||||
If ``allow_nan`` is false, then it will be a ``ValueError`` to
|
||||
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
|
||||
in strict compliance of the JSON specification, instead of using the
|
||||
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
|
||||
|
||||
If ``indent`` is a non-negative integer, then JSON array elements and
|
||||
object members will be pretty-printed with that indent level. An indent
|
||||
level of 0 will only insert newlines. ``None`` is the most compact
|
||||
representation. Since the default item separator is ``', '``, the
|
||||
output might include trailing whitespace when ``indent`` is specified.
|
||||
You can use ``separators=(',', ': ')`` to avoid this.
|
||||
|
||||
If ``separators`` is an ``(item_separator, dict_separator)`` tuple
|
||||
then it will be used instead of the default ``(', ', ': ')`` separators.
|
||||
``(',', ':')`` is the most compact JSON representation.
|
||||
|
||||
``default(obj)`` is a function that should return a serializable version
|
||||
of obj or raise TypeError. The default simply raises TypeError.
|
||||
|
||||
If *sort_keys* is ``True`` (default: ``False``), then the output of
|
||||
dictionaries will be sorted by key.
|
||||
|
||||
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
|
||||
``.default()`` method to serialize additional types), specify it with
|
||||
the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
|
||||
|
||||
"""
|
||||
# cached encoder
|
||||
if (not skipkeys and ensure_ascii and
|
||||
check_circular and allow_nan and
|
||||
cls is None and indent is None and separators is None and
|
||||
default is None and not sort_keys and not kw):
|
||||
iterable = _default_encoder.iterencode(obj)
|
||||
else:
|
||||
if cls is None:
|
||||
cls = JSONEncoder
|
||||
iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
|
||||
check_circular=check_circular, allow_nan=allow_nan, indent=indent,
|
||||
separators=separators,
|
||||
default=default, sort_keys=sort_keys, **kw).iterencode(obj)
|
||||
# could accelerate with writelines in some versions of Python, at
|
||||
# a debuggability cost
|
||||
for chunk in iterable:
|
||||
fp.write(chunk)
|
||||
|
||||
|
||||
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
|
||||
allow_nan=True, cls=None, indent=None, separators=None,
|
||||
default=None, sort_keys=False, **kw):
|
||||
"""Serialize ``obj`` to a JSON formatted ``str``.
|
||||
|
||||
If ``skipkeys`` is false then ``dict`` keys that are not basic types
|
||||
(``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped
|
||||
instead of raising a ``TypeError``.
|
||||
|
||||
If ``ensure_ascii`` is false, then the return value can contain non-ASCII
|
||||
characters if they appear in strings contained in ``obj``. Otherwise, all
|
||||
such characters are escaped in JSON strings.
|
||||
|
||||
If ``check_circular`` is false, then the circular reference check
|
||||
for container types will be skipped and a circular reference will
|
||||
result in an ``OverflowError`` (or worse).
|
||||
|
||||
If ``allow_nan`` is false, then it will be a ``ValueError`` to
|
||||
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
|
||||
strict compliance of the JSON specification, instead of using the
|
||||
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
|
||||
|
||||
If ``indent`` is a non-negative integer, then JSON array elements and
|
||||
object members will be pretty-printed with that indent level. An indent
|
||||
level of 0 will only insert newlines. ``None`` is the most compact
|
||||
representation. Since the default item separator is ``', '``, the
|
||||
output might include trailing whitespace when ``indent`` is specified.
|
||||
You can use ``separators=(',', ': ')`` to avoid this.
|
||||
|
||||
If ``separators`` is an ``(item_separator, dict_separator)`` tuple
|
||||
then it will be used instead of the default ``(', ', ': ')`` separators.
|
||||
``(',', ':')`` is the most compact JSON representation.
|
||||
|
||||
``default(obj)`` is a function that should return a serializable version
|
||||
of obj or raise TypeError. The default simply raises TypeError.
|
||||
|
||||
If *sort_keys* is ``True`` (default: ``False``), then the output of
|
||||
dictionaries will be sorted by key.
|
||||
|
||||
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
|
||||
``.default()`` method to serialize additional types), specify it with
|
||||
the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
|
||||
|
||||
"""
|
||||
# cached encoder
|
||||
if (not skipkeys and ensure_ascii and
|
||||
check_circular and allow_nan and
|
||||
cls is None and indent is None and separators is None and
|
||||
default is None and not sort_keys and not kw):
|
||||
return _default_encoder.encode(obj)
|
||||
if cls is None:
|
||||
cls = JSONEncoder
|
||||
return cls(
|
||||
skipkeys=skipkeys, ensure_ascii=ensure_ascii,
|
||||
check_circular=check_circular, allow_nan=allow_nan, indent=indent,
|
||||
separators=separators, default=default, sort_keys=sort_keys,
|
||||
**kw).encode(obj)
|
||||
|
||||
|
||||
_default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None)
|
||||
|
||||
|
||||
def load(fp, cls=None, object_hook=None, parse_float=None,
|
||||
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
|
||||
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
|
||||
a JSON document) to a Python object.
|
||||
|
||||
``object_hook`` is an optional function that will be called with the
|
||||
result of any object literal decode (a ``dict``). The return value of
|
||||
``object_hook`` will be used instead of the ``dict``. This feature
|
||||
can be used to implement custom decoders (e.g. JSON-RPC class hinting).
|
||||
|
||||
``object_pairs_hook`` is an optional function that will be called with the
|
||||
result of any object literal decoded with an ordered list of pairs. The
|
||||
return value of ``object_pairs_hook`` will be used instead of the ``dict``.
|
||||
This feature can be used to implement custom decoders that rely on the
|
||||
order that the key and value pairs are decoded (for example,
|
||||
collections.OrderedDict will remember the order of insertion). If
|
||||
``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
|
||||
|
||||
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
|
||||
kwarg; otherwise ``JSONDecoder`` is used.
|
||||
|
||||
"""
|
||||
return loads(fp.read(),
|
||||
cls=cls, object_hook=object_hook,
|
||||
parse_float=parse_float, parse_int=parse_int,
|
||||
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
|
||||
|
||||
|
||||
def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
|
||||
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
|
||||
"""Deserialize ``s`` (a ``str`` instance containing a JSON
|
||||
document) to a Python object.
|
||||
|
||||
``object_hook`` is an optional function that will be called with the
|
||||
result of any object literal decode (a ``dict``). The return value of
|
||||
``object_hook`` will be used instead of the ``dict``. This feature
|
||||
can be used to implement custom decoders (e.g. JSON-RPC class hinting).
|
||||
|
||||
``object_pairs_hook`` is an optional function that will be called with the
|
||||
result of any object literal decoded with an ordered list of pairs. The
|
||||
return value of ``object_pairs_hook`` will be used instead of the ``dict``.
|
||||
This feature can be used to implement custom decoders that rely on the
|
||||
order that the key and value pairs are decoded (for example,
|
||||
collections.OrderedDict will remember the order of insertion). If
|
||||
``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
|
||||
|
||||
``parse_float``, if specified, will be called with the string
|
||||
of every JSON float to be decoded. By default this is equivalent to
|
||||
float(num_str). This can be used to use another datatype or parser
|
||||
for JSON floats (e.g. decimal.Decimal).
|
||||
|
||||
``parse_int``, if specified, will be called with the string
|
||||
of every JSON int to be decoded. By default this is equivalent to
|
||||
int(num_str). This can be used to use another datatype or parser
|
||||
for JSON integers (e.g. float).
|
||||
|
||||
``parse_constant``, if specified, will be called with one of the
|
||||
following strings: -Infinity, Infinity, NaN, null, true, false.
|
||||
This can be used to raise an exception if invalid JSON numbers
|
||||
are encountered.
|
||||
|
||||
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
|
||||
kwarg; otherwise ``JSONDecoder`` is used.
|
||||
|
||||
The ``encoding`` argument is ignored and deprecated.
|
||||
|
||||
"""
|
||||
if (cls is None and object_hook is None and
|
||||
parse_int is None and parse_float is None and
|
||||
parse_constant is None and object_pairs_hook is None and not kw):
|
||||
return _default_decoder.decode(s)
|
||||
if cls is None:
|
||||
cls = JSONDecoder
|
||||
if object_hook is not None:
|
||||
kw['object_hook'] = object_hook
|
||||
if object_pairs_hook is not None:
|
||||
kw['object_pairs_hook'] = object_pairs_hook
|
||||
if parse_float is not None:
|
||||
kw['parse_float'] = parse_float
|
||||
if parse_int is not None:
|
||||
kw['parse_int'] = parse_int
|
||||
if parse_constant is not None:
|
||||
kw['parse_constant'] = parse_constant
|
||||
return cls(**kw).decode(s)
|
|
@ -0,0 +1,362 @@
|
|||
"""Implementation of JSONDecoder
|
||||
"""
|
||||
import re
|
||||
import sys
|
||||
|
||||
from json import scanner
|
||||
try:
|
||||
from _json import scanstring as c_scanstring
|
||||
except ImportError:
|
||||
c_scanstring = None
|
||||
|
||||
__all__ = ['JSONDecoder']
|
||||
|
||||
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
|
||||
|
||||
NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf')
|
||||
|
||||
|
||||
def linecol(doc, pos):
|
||||
if isinstance(doc, bytes):
|
||||
newline = b'\n'
|
||||
else:
|
||||
newline = '\n'
|
||||
lineno = doc.count(newline, 0, pos) + 1
|
||||
if lineno == 1:
|
||||
colno = pos + 1
|
||||
else:
|
||||
colno = pos - doc.rindex(newline, 0, pos)
|
||||
return lineno, colno
|
||||
|
||||
|
||||
def errmsg(msg, doc, pos, end=None):
|
||||
# Note that this function is called from _json
|
||||
lineno, colno = linecol(doc, pos)
|
||||
if end is None:
|
||||
fmt = '{0}: line {1} column {2} (char {3})'
|
||||
return fmt.format(msg, lineno, colno, pos)
|
||||
#fmt = '%s: line %d column %d (char %d)'
|
||||
#return fmt % (msg, lineno, colno, pos)
|
||||
endlineno, endcolno = linecol(doc, end)
|
||||
fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
|
||||
return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
|
||||
#fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
|
||||
#return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
|
||||
|
||||
|
||||
_CONSTANTS = {
|
||||
'-Infinity': NegInf,
|
||||
'Infinity': PosInf,
|
||||
'NaN': NaN,
|
||||
}
|
||||
|
||||
|
||||
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
|
||||
BACKSLASH = {
|
||||
'"': '"', '\\': '\\', '/': '/',
|
||||
'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
|
||||
}
|
||||
|
||||
def py_scanstring(s, end, strict=True,
|
||||
_b=BACKSLASH, _m=STRINGCHUNK.match):
|
||||
"""Scan the string s for a JSON string. End is the index of the
|
||||
character in s after the quote that started the JSON string.
|
||||
Unescapes all valid JSON string escape sequences and raises ValueError
|
||||
on attempt to decode an invalid string. If strict is False then literal
|
||||
control characters are allowed in the string.
|
||||
|
||||
Returns a tuple of the decoded string and the index of the character in s
|
||||
after the end quote."""
|
||||
chunks = []
|
||||
_append = chunks.append
|
||||
begin = end - 1
|
||||
while 1:
|
||||
chunk = _m(s, end)
|
||||
if chunk is None:
|
||||
raise ValueError(
|
||||
errmsg("Unterminated string starting at", s, begin))
|
||||
end = chunk.end()
|
||||
content, terminator = chunk.groups()
|
||||
# Content is contains zero or more unescaped string characters
|
||||
if content:
|
||||
_append(content)
|
||||
# Terminator is the end of string, a literal control character,
|
||||
# or a backslash denoting that an escape sequence follows
|
||||
if terminator == '"':
|
||||
break
|
||||
elif terminator != '\\':
|
||||
if strict:
|
||||
#msg = "Invalid control character %r at" % (terminator,)
|
||||
msg = "Invalid control character {0!r} at".format(terminator)
|
||||
raise ValueError(errmsg(msg, s, end))
|
||||
else:
|
||||
_append(terminator)
|
||||
continue
|
||||
try:
|
||||
esc = s[end]
|
||||
except IndexError:
|
||||
raise ValueError(
|
||||
errmsg("Unterminated string starting at", s, begin))
|
||||
# If not a unicode escape sequence, must be in the lookup table
|
||||
if esc != 'u':
|
||||
try:
|
||||
char = _b[esc]
|
||||
except KeyError:
|
||||
msg = "Invalid \\escape: {0!r}".format(esc)
|
||||
raise ValueError(errmsg(msg, s, end))
|
||||
end += 1
|
||||
else:
|
||||
esc = s[end + 1:end + 5]
|
||||
next_end = end + 5
|
||||
if len(esc) != 4:
|
||||
msg = "Invalid \\uXXXX escape"
|
||||
raise ValueError(errmsg(msg, s, end))
|
||||
uni = int(esc, 16)
|
||||
if 0xd800 <= uni <= 0xdbff:
|
||||
msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
|
||||
if not s[end + 5:end + 7] == '\\u':
|
||||
raise ValueError(errmsg(msg, s, end))
|
||||
esc2 = s[end + 7:end + 11]
|
||||
if len(esc2) != 4:
|
||||
raise ValueError(errmsg(msg, s, end))
|
||||
uni2 = int(esc2, 16)
|
||||
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
|
||||
next_end += 6
|
||||
char = chr(uni)
|
||||
|
||||
end = next_end
|
||||
_append(char)
|
||||
return ''.join(chunks), end
|
||||
|
||||
|
||||
# Use speedup if available
|
||||
scanstring = c_scanstring or py_scanstring
|
||||
|
||||
WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
|
||||
WHITESPACE_STR = ' \t\n\r'
|
||||
|
||||
|
||||
def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
|
||||
memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
|
||||
s, end = s_and_end
|
||||
pairs = []
|
||||
pairs_append = pairs.append
|
||||
# Backwards compatibility
|
||||
if memo is None:
|
||||
memo = {}
|
||||
memo_get = memo.setdefault
|
||||
# Use a slice to prevent IndexError from being raised, the following
|
||||
# check will raise a more specific ValueError if the string is empty
|
||||
nextchar = s[end:end + 1]
|
||||
# Normally we expect nextchar == '"'
|
||||
if nextchar != '"':
|
||||
if nextchar in _ws:
|
||||
end = _w(s, end).end()
|
||||
nextchar = s[end:end + 1]
|
||||
# Trivial empty object
|
||||
if nextchar == '}':
|
||||
if object_pairs_hook is not None:
|
||||
result = object_pairs_hook(pairs)
|
||||
return result, end + 1
|
||||
pairs = {}
|
||||
if object_hook is not None:
|
||||
pairs = object_hook(pairs)
|
||||
return pairs, end + 1
|
||||
elif nextchar != '"':
|
||||
raise ValueError(errmsg(
|
||||
"Expecting property name enclosed in double quotes", s, end))
|
||||
end += 1
|
||||
while True:
|
||||
key, end = scanstring(s, end, strict)
|
||||
key = memo_get(key, key)
|
||||
# To skip some function call overhead we optimize the fast paths where
|
||||
# the JSON key separator is ": " or just ":".
|
||||
if s[end:end + 1] != ':':
|
||||
end = _w(s, end).end()
|
||||
if s[end:end + 1] != ':':
|
||||
raise ValueError(errmsg("Expecting ':' delimiter", s, end))
|
||||
end += 1
|
||||
|
||||
try:
|
||||
if s[end] in _ws:
|
||||
end += 1
|
||||
if s[end] in _ws:
|
||||
end = _w(s, end + 1).end()
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
try:
|
||||
value, end = scan_once(s, end)
|
||||
except StopIteration:
|
||||
raise ValueError(errmsg("Expecting object", s, end))
|
||||
pairs_append((key, value))
|
||||
try:
|
||||
nextchar = s[end]
|
||||
if nextchar in _ws:
|
||||
end = _w(s, end + 1).end()
|
||||
nextchar = s[end]
|
||||
except IndexError:
|
||||
nextchar = ''
|
||||
end += 1
|
||||
|
||||
if nextchar == '}':
|
||||
break
|
||||
elif nextchar != ',':
|
||||
raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
|
||||
end = _w(s, end).end()
|
||||
nextchar = s[end:end + 1]
|
||||
end += 1
|
||||
if nextchar != '"':
|
||||
raise ValueError(errmsg(
|
||||
"Expecting property name enclosed in double quotes", s, end - 1))
|
||||
if object_pairs_hook is not None:
|
||||
result = object_pairs_hook(pairs)
|
||||
return result, end
|
||||
pairs = dict(pairs)
|
||||
if object_hook is not None:
|
||||
pairs = object_hook(pairs)
|
||||
return pairs, end
|
||||
|
||||
def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
|
||||
s, end = s_and_end
|
||||
values = []
|
||||
nextchar = s[end:end + 1]
|
||||
if nextchar in _ws:
|
||||
end = _w(s, end + 1).end()
|
||||
nextchar = s[end:end + 1]
|
||||
# Look-ahead for trivial empty array
|
||||
if nextchar == ']':
|
||||
return values, end + 1
|
||||
_append = values.append
|
||||
while True:
|
||||
try:
|
||||
value, end = scan_once(s, end)
|
||||
except StopIteration:
|
||||
raise ValueError(errmsg("Expecting object", s, end))
|
||||
_append(value)
|
||||
nextchar = s[end:end + 1]
|
||||
if nextchar in _ws:
|
||||
end = _w(s, end + 1).end()
|
||||
nextchar = s[end:end + 1]
|
||||
end += 1
|
||||
if nextchar == ']':
|
||||
break
|
||||
elif nextchar != ',':
|
||||
raise ValueError(errmsg("Expecting ',' delimiter", s, end))
|
||||
try:
|
||||
if s[end] in _ws:
|
||||
end += 1
|
||||
if s[end] in _ws:
|
||||
end = _w(s, end + 1).end()
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
return values, end
|
||||
|
||||
|
||||
class JSONDecoder(object):
|
||||
"""Simple JSON <http://json.org> decoder
|
||||
|
||||
Performs the following translations in decoding by default:
|
||||
|
||||
+---------------+-------------------+
|
||||
| JSON | Python |
|
||||
+===============+===================+
|
||||
| object | dict |
|
||||
+---------------+-------------------+
|
||||
| array | list |
|
||||
+---------------+-------------------+
|
||||
| string | str |
|
||||
+---------------+-------------------+
|
||||
| number (int) | int |
|
||||
+---------------+-------------------+
|
||||
| number (real) | float |
|
||||
+---------------+-------------------+
|
||||
| true | True |
|
||||
+---------------+-------------------+
|
||||
| false | False |
|
||||
+---------------+-------------------+
|
||||
| null | None |
|
||||
+---------------+-------------------+
|
||||
|
||||
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
|
||||
their corresponding ``float`` values, which is outside the JSON spec.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, object_hook=None, parse_float=None,
|
||||
parse_int=None, parse_constant=None, strict=True,
|
||||
object_pairs_hook=None):
|
||||
"""``object_hook``, if specified, will be called with the result
|
||||
of every JSON object decoded and its return value will be used in
|
||||
place of the given ``dict``. This can be used to provide custom
|
||||
deserializations (e.g. to support JSON-RPC class hinting).
|
||||
|
||||
``object_pairs_hook``, if specified will be called with the result of
|
||||
every JSON object decoded with an ordered list of pairs. The return
|
||||
value of ``object_pairs_hook`` will be used instead of the ``dict``.
|
||||
This feature can be used to implement custom decoders that rely on the
|
||||
order that the key and value pairs are decoded (for example,
|
||||
collections.OrderedDict will remember the order of insertion). If
|
||||
``object_hook`` is also defined, the ``object_pairs_hook`` takes
|
||||
priority.
|
||||
|
||||
``parse_float``, if specified, will be called with the string
|
||||
of every JSON float to be decoded. By default this is equivalent to
|
||||
float(num_str). This can be used to use another datatype or parser
|
||||
for JSON floats (e.g. decimal.Decimal).
|
||||
|
||||
``parse_int``, if specified, will be called with the string
|
||||
of every JSON int to be decoded. By default this is equivalent to
|
||||
int(num_str). This can be used to use another datatype or parser
|
||||
for JSON integers (e.g. float).
|
||||
|
||||
``parse_constant``, if specified, will be called with one of the
|
||||
following strings: -Infinity, Infinity, NaN.
|
||||
This can be used to raise an exception if invalid JSON numbers
|
||||
are encountered.
|
||||
|
||||
If ``strict`` is false (true is the default), then control
|
||||
characters will be allowed inside strings. Control characters in
|
||||
this context are those with character codes in the 0-31 range,
|
||||
including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
|
||||
|
||||
"""
|
||||
self.object_hook = object_hook
|
||||
self.parse_float = parse_float or float
|
||||
self.parse_int = parse_int or int
|
||||
self.parse_constant = parse_constant or _CONSTANTS.__getitem__
|
||||
self.strict = strict
|
||||
self.object_pairs_hook = object_pairs_hook
|
||||
self.parse_object = JSONObject
|
||||
self.parse_array = JSONArray
|
||||
self.parse_string = scanstring
|
||||
self.memo = {}
|
||||
self.scan_once = scanner.make_scanner(self)
|
||||
|
||||
|
||||
def decode(self, s, _w=WHITESPACE.match):
|
||||
"""Return the Python representation of ``s`` (a ``str`` instance
|
||||
containing a JSON document).
|
||||
|
||||
"""
|
||||
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
|
||||
end = _w(s, end).end()
|
||||
if end != len(s):
|
||||
raise ValueError(errmsg("Extra data", s, end, len(s)))
|
||||
return obj
|
||||
|
||||
def raw_decode(self, s, idx=0):
|
||||
"""Decode a JSON document from ``s`` (a ``str`` beginning with
|
||||
a JSON document) and return a 2-tuple of the Python
|
||||
representation and the index in ``s`` where the document ended.
|
||||
|
||||
This can be used to decode a JSON document from a string that may
|
||||
have extraneous data at the end.
|
||||
|
||||
"""
|
||||
try:
|
||||
obj, end = self.scan_once(s, idx)
|
||||
except StopIteration:
|
||||
raise ValueError("No JSON object could be decoded")
|
||||
return obj, end
|
|
@ -0,0 +1,427 @@
|
|||
"""Implementation of JSONEncoder
|
||||
"""
|
||||
import re
|
||||
|
||||
try:
|
||||
from _json import encode_basestring_ascii as c_encode_basestring_ascii
|
||||
except ImportError:
|
||||
c_encode_basestring_ascii = None
|
||||
try:
|
||||
from _json import make_encoder as c_make_encoder
|
||||
except ImportError:
|
||||
c_make_encoder = None
|
||||
|
||||
ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
|
||||
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
|
||||
HAS_UTF8 = re.compile(b'[\x80-\xff]')
|
||||
ESCAPE_DCT = {
|
||||
'\\': '\\\\',
|
||||
'"': '\\"',
|
||||
'\b': '\\b',
|
||||
'\f': '\\f',
|
||||
'\n': '\\n',
|
||||
'\r': '\\r',
|
||||
'\t': '\\t',
|
||||
}
|
||||
for i in range(0x20):
|
||||
ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
|
||||
#ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
|
||||
|
||||
INFINITY = float('inf')
|
||||
FLOAT_REPR = repr
|
||||
|
||||
def encode_basestring(s):
|
||||
"""Return a JSON representation of a Python string
|
||||
|
||||
"""
|
||||
def replace(match):
|
||||
return ESCAPE_DCT[match.group(0)]
|
||||
return '"' + ESCAPE.sub(replace, s) + '"'
|
||||
|
||||
|
||||
def py_encode_basestring_ascii(s):
|
||||
"""Return an ASCII-only JSON representation of a Python string
|
||||
|
||||
"""
|
||||
def replace(match):
|
||||
s = match.group(0)
|
||||
try:
|
||||
return ESCAPE_DCT[s]
|
||||
except KeyError:
|
||||
n = ord(s)
|
||||
if n < 0x10000:
|
||||
return '\\u{0:04x}'.format(n)
|
||||
#return '\\u%04x' % (n,)
|
||||
else:
|
||||
# surrogate pair
|
||||
n -= 0x10000
|
||||
s1 = 0xd800 | ((n >> 10) & 0x3ff)
|
||||
s2 = 0xdc00 | (n & 0x3ff)
|
||||
return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
|
||||
return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
|
||||
|
||||
|
||||
encode_basestring_ascii = (
|
||||
c_encode_basestring_ascii or py_encode_basestring_ascii)
|
||||
|
||||
class JSONEncoder(object):
|
||||
"""Extensible JSON <http://json.org> encoder for Python data structures.
|
||||
|
||||
Supports the following objects and types by default:
|
||||
|
||||
+-------------------+---------------+
|
||||
| Python | JSON |
|
||||
+===================+===============+
|
||||
| dict | object |
|
||||
+-------------------+---------------+
|
||||
| list, tuple | array |
|
||||
+-------------------+---------------+
|
||||
| str | string |
|
||||
+-------------------+---------------+
|
||||
| int, float | number |
|
||||
+-------------------+---------------+
|
||||
| True | true |
|
||||
+-------------------+---------------+
|
||||
| False | false |
|
||||
+-------------------+---------------+
|
||||
| None | null |
|
||||
+-------------------+---------------+
|
||||
|
||||
To extend this to recognize other objects, subclass and implement a
|
||||
``.default()`` method with another method that returns a serializable
|
||||
object for ``o`` if possible, otherwise it should call the superclass
|
||||
implementation (to raise ``TypeError``).
|
||||
|
||||
"""
|
||||
item_separator = ', '
|
||||
key_separator = ': '
|
||||
def __init__(self, skipkeys=False, ensure_ascii=True,
|
||||
check_circular=True, allow_nan=True, sort_keys=False,
|
||||
indent=None, separators=None, default=None):
|
||||
"""Constructor for JSONEncoder, with sensible defaults.
|
||||
|
||||
If skipkeys is false, then it is a TypeError to attempt
|
||||
encoding of keys that are not str, int, float or None. If
|
||||
skipkeys is True, such items are simply skipped.
|
||||
|
||||
If ensure_ascii is true, the output is guaranteed to be str
|
||||
objects with all incoming non-ASCII characters escaped. If
|
||||
ensure_ascii is false, the output can contain non-ASCII characters.
|
||||
|
||||
If check_circular is true, then lists, dicts, and custom encoded
|
||||
objects will be checked for circular references during encoding to
|
||||
prevent an infinite recursion (which would cause an OverflowError).
|
||||
Otherwise, no such check takes place.
|
||||
|
||||
If allow_nan is true, then NaN, Infinity, and -Infinity will be
|
||||
encoded as such. This behavior is not JSON specification compliant,
|
||||
but is consistent with most JavaScript based encoders and decoders.
|
||||
Otherwise, it will be a ValueError to encode such floats.
|
||||
|
||||
If sort_keys is true, then the output of dictionaries will be
|
||||
sorted by key; this is useful for regression tests to ensure
|
||||
that JSON serializations can be compared on a day-to-day basis.
|
||||
|
||||
If indent is a non-negative integer, then JSON array
|
||||
elements and object members will be pretty-printed with that
|
||||
indent level. An indent level of 0 will only insert newlines.
|
||||
None is the most compact representation. Since the default
|
||||
item separator is ', ', the output might include trailing
|
||||
whitespace when indent is specified. You can use
|
||||
separators=(',', ': ') to avoid this.
|
||||
|
||||
If specified, separators should be a (item_separator, key_separator)
|
||||
tuple. The default is (', ', ': '). To get the most compact JSON
|
||||
representation you should specify (',', ':') to eliminate whitespace.
|
||||
|
||||
If specified, default is a function that gets called for objects
|
||||
that can't otherwise be serialized. It should return a JSON encodable
|
||||
version of the object or raise a ``TypeError``.
|
||||
|
||||
"""
|
||||
|
||||
self.skipkeys = skipkeys
|
||||
self.ensure_ascii = ensure_ascii
|
||||
self.check_circular = check_circular
|
||||
self.allow_nan = allow_nan
|
||||
self.sort_keys = sort_keys
|
||||
self.indent = indent
|
||||
if separators is not None:
|
||||
self.item_separator, self.key_separator = separators
|
||||
if default is not None:
|
||||
self.default = default
|
||||
|
||||
def default(self, o):
|
||||
"""Implement this method in a subclass such that it returns
|
||||
a serializable object for ``o``, or calls the base implementation
|
||||
(to raise a ``TypeError``).
|
||||
|
||||
For example, to support arbitrary iterators, you could
|
||||
implement default like this::
|
||||
|
||||
def default(self, o):
|
||||
try:
|
||||
iterable = iter(o)
|
||||
except TypeError:
|
||||
pass
|
||||
else:
|
||||
return list(iterable)
|
||||
# Let the base class default method raise the TypeError
|
||||
return JSONEncoder.default(self, o)
|
||||
|
||||
"""
|
||||
raise TypeError(repr(o) + " is not JSON serializable")
|
||||
|
||||
def encode(self, o):
|
||||
"""Return a JSON string representation of a Python data structure.
|
||||
|
||||
>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
|
||||
'{"foo": ["bar", "baz"]}'
|
||||
|
||||
"""
|
||||
# This is for extremely simple cases and benchmarks.
|
||||
if isinstance(o, str):
|
||||
if self.ensure_ascii:
|
||||
return encode_basestring_ascii(o)
|
||||
else:
|
||||
return encode_basestring(o)
|
||||
# This doesn't pass the iterator directly to ''.join() because the
|
||||
# exceptions aren't as detailed. The list call should be roughly
|
||||
# equivalent to the PySequence_Fast that ''.join() would do.
|
||||
chunks = self.iterencode(o, _one_shot=True)
|
||||
if not isinstance(chunks, (list, tuple)):
|
||||
chunks = list(chunks)
|
||||
return ''.join(chunks)
|
||||
|
||||
def iterencode(self, o, _one_shot=False):
|
||||
"""Encode the given object and yield each string
|
||||
representation as available.
|
||||
|
||||
For example::
|
||||
|
||||
for chunk in JSONEncoder().iterencode(bigobject):
|
||||
mysocket.write(chunk)
|
||||
|
||||
"""
|
||||
if self.check_circular:
|
||||
markers = {}
|
||||
else:
|
||||
markers = None
|
||||
if self.ensure_ascii:
|
||||
_encoder = encode_basestring_ascii
|
||||
else:
|
||||
_encoder = encode_basestring
|
||||
|
||||
def floatstr(o, allow_nan=self.allow_nan,
|
||||
_repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
|
||||
# Check for specials. Note that this type of test is processor
|
||||
# and/or platform-specific, so do tests which don't depend on the
|
||||
# internals.
|
||||
|
||||
if o != o:
|
||||
text = 'NaN'
|
||||
elif o == _inf:
|
||||
text = 'Infinity'
|
||||
elif o == _neginf:
|
||||
text = '-Infinity'
|
||||
else:
|
||||
return _repr(o)
|
||||
|
||||
if not allow_nan:
|
||||
raise ValueError(
|
||||
"Out of range float values are not JSON compliant: " +
|
||||
repr(o))
|
||||
|
||||
return text
|
||||
|
||||
|
||||
if (_one_shot and c_make_encoder is not None
|
||||
and self.indent is None):
|
||||
_iterencode = c_make_encoder(
|
||||
markers, self.default, _encoder, self.indent,
|
||||
self.key_separator, self.item_separator, self.sort_keys,
|
||||
self.skipkeys, self.allow_nan)
|
||||
else:
|
||||
_iterencode = _make_iterencode(
|
||||
markers, self.default, _encoder, self.indent, floatstr,
|
||||
self.key_separator, self.item_separator, self.sort_keys,
|
||||
self.skipkeys, _one_shot)
|
||||
return _iterencode(o, 0)
|
||||
|
||||
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
|
||||
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
|
||||
## HACK: hand-optimized bytecode; turn globals into locals
|
||||
ValueError=ValueError,
|
||||
dict=dict,
|
||||
float=float,
|
||||
id=id,
|
||||
int=int,
|
||||
isinstance=isinstance,
|
||||
list=list,
|
||||
str=str,
|
||||
tuple=tuple,
|
||||
):
|
||||
|
||||
if _indent is not None and not isinstance(_indent, str):
|
||||
_indent = ' ' * _indent
|
||||
|
||||
def _iterencode_list(lst, _current_indent_level):
|
||||
if not lst:
|
||||
yield '[]'
|
||||
return
|
||||
if markers is not None:
|
||||
markerid = id(lst)
|
||||
if markerid in markers:
|
||||
raise ValueError("Circular reference detected")
|
||||
markers[markerid] = lst
|
||||
buf = '['
|
||||
if _indent is not None:
|
||||
_current_indent_level += 1
|
||||
newline_indent = '\n' + _indent * _current_indent_level
|
||||
separator = _item_separator + newline_indent
|
||||
buf += newline_indent
|
||||
else:
|
||||
newline_indent = None
|
||||
separator = _item_separator
|
||||
first = True
|
||||
for value in lst:
|
||||
if first:
|
||||
first = False
|
||||
else:
|
||||
buf = separator
|
||||
if isinstance(value, str):
|
||||
yield buf + _encoder(value)
|
||||
elif value is None:
|
||||
yield buf + 'null'
|
||||
elif value is True:
|
||||
yield buf + 'true'
|
||||
elif value is False:
|
||||
yield buf + 'false'
|
||||
elif isinstance(value, int):
|
||||
yield buf + str(value)
|
||||
elif isinstance(value, float):
|
||||
yield buf + _floatstr(value)
|
||||
else:
|
||||
yield buf
|
||||
if isinstance(value, (list, tuple)):
|
||||
chunks = _iterencode_list(value, _current_indent_level)
|
||||
elif isinstance(value, dict):
|
||||
chunks = _iterencode_dict(value, _current_indent_level)
|
||||
else:
|
||||
chunks = _iterencode(value, _current_indent_level)
|
||||
for chunk in chunks:
|
||||
yield chunk
|
||||
if newline_indent is not None:
|
||||
_current_indent_level -= 1
|
||||
yield '\n' + _indent * _current_indent_level
|
||||
yield ']'
|
||||
if markers is not None:
|
||||
del markers[markerid]
|
||||
|
||||
def _iterencode_dict(dct, _current_indent_level):
|
||||
if not dct:
|
||||
yield '{}'
|
||||
return
|
||||
if markers is not None:
|
||||
markerid = id(dct)
|
||||
if markerid in markers:
|
||||
raise ValueError("Circular reference detected")
|
||||
markers[markerid] = dct
|
||||
yield '{'
|
||||
if _indent is not None:
|
||||
_current_indent_level += 1
|
||||
newline_indent = '\n' + _indent * _current_indent_level
|
||||
item_separator = _item_separator + newline_indent
|
||||
yield newline_indent
|
||||
else:
|
||||
newline_indent = None
|
||||
item_separator = _item_separator
|
||||
first = True
|
||||
if _sort_keys:
|
||||
items = sorted(dct.items(), key=lambda kv: kv[0])
|
||||
else:
|
||||
items = dct.items()
|
||||
for key, value in items:
|
||||
if isinstance(key, str):
|
||||
pass
|
||||
# JavaScript is weakly typed for these, so it makes sense to
|
||||
# also allow them. Many encoders seem to do something like this.
|
||||
elif isinstance(key, float):
|
||||
key = _floatstr(key)
|
||||
elif key is True:
|
||||
key = 'true'
|
||||
elif key is False:
|
||||
key = 'false'
|
||||
elif key is None:
|
||||
key = 'null'
|
||||
elif isinstance(key, int):
|
||||
key = str(key)
|
||||
elif _skipkeys:
|
||||
continue
|
||||
else:
|
||||
raise TypeError("key " + repr(key) + " is not a string")
|
||||
if first:
|
||||
first = False
|
||||
else:
|
||||
yield item_separator
|
||||
yield _encoder(key)
|
||||
yield _key_separator
|
||||
if isinstance(value, str):
|
||||
yield _encoder(value)
|
||||
elif value is None:
|
||||
yield 'null'
|
||||
elif value is True:
|
||||
yield 'true'
|
||||
elif value is False:
|
||||
yield 'false'
|
||||
elif isinstance(value, int):
|
||||
yield str(value)
|
||||
elif isinstance(value, float):
|
||||
yield _floatstr(value)
|
||||
else:
|
||||
if isinstance(value, (list, tuple)):
|
||||
chunks = _iterencode_list(value, _current_indent_level)
|
||||
elif isinstance(value, dict):
|
||||
chunks = _iterencode_dict(value, _current_indent_level)
|
||||
else:
|
||||
chunks = _iterencode(value, _current_indent_level)
|
||||
for chunk in chunks:
|
||||
yield chunk
|
||||
if newline_indent is not None:
|
||||
_current_indent_level -= 1
|
||||
yield '\n' + _indent * _current_indent_level
|
||||
yield '}'
|
||||
if markers is not None:
|
||||
del markers[markerid]
|
||||
|
||||
def _iterencode(o, _current_indent_level):
|
||||
if isinstance(o, str):
|
||||
yield _encoder(o)
|
||||
elif o is None:
|
||||
yield 'null'
|
||||
elif o is True:
|
||||
yield 'true'
|
||||
elif o is False:
|
||||
yield 'false'
|
||||
elif isinstance(o, int):
|
||||
yield str(o)
|
||||
elif isinstance(o, float):
|
||||
yield _floatstr(o)
|
||||
elif isinstance(o, (list, tuple)):
|
||||
for chunk in _iterencode_list(o, _current_indent_level):
|
||||
yield chunk
|
||||
elif isinstance(o, dict):
|
||||
for chunk in _iterencode_dict(o, _current_indent_level):
|
||||
yield chunk
|
||||
else:
|
||||
if markers is not None:
|
||||
markerid = id(o)
|
||||
if markerid in markers:
|
||||
raise ValueError("Circular reference detected")
|
||||
markers[markerid] = o
|
||||
o = _default(o)
|
||||
for chunk in _iterencode(o, _current_indent_level):
|
||||
yield chunk
|
||||
if markers is not None:
|
||||
del markers[markerid]
|
||||
return _iterencode
|
|
@ -0,0 +1,73 @@
|
|||
"""JSON token scanner
|
||||
"""
|
||||
import re
|
||||
try:
|
||||
from _json import make_scanner as c_make_scanner
|
||||
except ImportError:
|
||||
c_make_scanner = None
|
||||
|
||||
__all__ = ['make_scanner']
|
||||
|
||||
NUMBER_RE = re.compile(
|
||||
r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
|
||||
(re.VERBOSE | re.MULTILINE | re.DOTALL))
|
||||
|
||||
def py_make_scanner(context):
|
||||
parse_object = context.parse_object
|
||||
parse_array = context.parse_array
|
||||
parse_string = context.parse_string
|
||||
match_number = NUMBER_RE.match
|
||||
strict = context.strict
|
||||
parse_float = context.parse_float
|
||||
parse_int = context.parse_int
|
||||
parse_constant = context.parse_constant
|
||||
object_hook = context.object_hook
|
||||
object_pairs_hook = context.object_pairs_hook
|
||||
memo = context.memo
|
||||
|
||||
def _scan_once(string, idx):
|
||||
try:
|
||||
nextchar = string[idx]
|
||||
except IndexError:
|
||||
raise StopIteration
|
||||
|
||||
if nextchar == '"':
|
||||
return parse_string(string, idx + 1, strict)
|
||||
elif nextchar == '{':
|
||||
return parse_object((string, idx + 1), strict,
|
||||
_scan_once, object_hook, object_pairs_hook, memo)
|
||||
elif nextchar == '[':
|
||||
return parse_array((string, idx + 1), _scan_once)
|
||||
elif nextchar == 'n' and string[idx:idx + 4] == 'null':
|
||||
return None, idx + 4
|
||||
elif nextchar == 't' and string[idx:idx + 4] == 'true':
|
||||
return True, idx + 4
|
||||
elif nextchar == 'f' and string[idx:idx + 5] == 'false':
|
||||
return False, idx + 5
|
||||
|
||||
m = match_number(string, idx)
|
||||
if m is not None:
|
||||
integer, frac, exp = m.groups()
|
||||
if frac or exp:
|
||||
res = parse_float(integer + (frac or '') + (exp or ''))
|
||||
else:
|
||||
res = parse_int(integer)
|
||||
return res, m.end()
|
||||
elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
|
||||
return parse_constant('NaN'), idx + 3
|
||||
elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
|
||||
return parse_constant('Infinity'), idx + 8
|
||||
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
|
||||
return parse_constant('-Infinity'), idx + 9
|
||||
else:
|
||||
raise StopIteration
|
||||
|
||||
def scan_once(string, idx):
|
||||
try:
|
||||
return _scan_once(string, idx)
|
||||
finally:
|
||||
memo.clear()
|
||||
|
||||
return _scan_once
|
||||
|
||||
make_scanner = c_make_scanner or py_make_scanner
|
|
@ -0,0 +1,40 @@
|
|||
r"""Command-line tool to validate and pretty-print JSON
|
||||
|
||||
Usage::
|
||||
|
||||
$ echo '{"json":"obj"}' | python -m json.tool
|
||||
{
|
||||
"json": "obj"
|
||||
}
|
||||
$ echo '{ 1.2:3.4}' | python -m json.tool
|
||||
Expecting property name enclosed in double quotes: line 1 column 3 (char 2)
|
||||
|
||||
"""
|
||||
import sys
|
||||
import json
|
||||
|
||||
def main():
|
||||
if len(sys.argv) == 1:
|
||||
infile = sys.stdin
|
||||
outfile = sys.stdout
|
||||
elif len(sys.argv) == 2:
|
||||
infile = open(sys.argv[1], 'r')
|
||||
outfile = sys.stdout
|
||||
elif len(sys.argv) == 3:
|
||||
infile = open(sys.argv[1], 'r')
|
||||
outfile = open(sys.argv[2], 'w')
|
||||
else:
|
||||
raise SystemExit(sys.argv[0] + " [infile [outfile]]")
|
||||
with infile:
|
||||
try:
|
||||
obj = json.load(infile)
|
||||
except ValueError as e:
|
||||
raise SystemExit(e)
|
||||
with outfile:
|
||||
json.dump(obj, outfile, sort_keys=True,
|
||||
indent=4, separators=(',', ': '))
|
||||
outfile.write('\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,94 @@
|
|||
#! /usr/bin/env python3
|
||||
|
||||
"""Keywords (from "graminit.c")
|
||||
|
||||
This file is automatically generated; please don't muck it up!
|
||||
|
||||
To update the symbols in this file, 'cd' to the top directory of
|
||||
the python source tree after building the interpreter and run:
|
||||
|
||||
./python Lib/keyword.py
|
||||
"""
|
||||
|
||||
__all__ = ["iskeyword", "kwlist"]
|
||||
|
||||
kwlist = [
|
||||
#--start keywords--
|
||||
'False',
|
||||
'None',
|
||||
'True',
|
||||
'and',
|
||||
'as',
|
||||
'assert',
|
||||
'break',
|
||||
'class',
|
||||
'continue',
|
||||
'def',
|
||||
'del',
|
||||
'elif',
|
||||
'else',
|
||||
'except',
|
||||
'finally',
|
||||
'for',
|
||||
'from',
|
||||
'global',
|
||||
'if',
|
||||
'import',
|
||||
'in',
|
||||
'is',
|
||||
'lambda',
|
||||
'nonlocal',
|
||||
'not',
|
||||
'or',
|
||||
'pass',
|
||||
'raise',
|
||||
'return',
|
||||
'try',
|
||||
'while',
|
||||
'with',
|
||||
'yield',
|
||||
#--end keywords--
|
||||
]
|
||||
|
||||
frozenset = set
|
||||
iskeyword = frozenset(kwlist).__contains__
|
||||
|
||||
def main():
|
||||
import sys, re
|
||||
|
||||
args = sys.argv[1:]
|
||||
iptfile = args and args[0] or "Python/graminit.c"
|
||||
if len(args) > 1: optfile = args[1]
|
||||
else: optfile = "Lib/keyword.py"
|
||||
|
||||
# scan the source file for keywords
|
||||
with open(iptfile) as fp:
|
||||
strprog = re.compile('"([^"]+)"')
|
||||
lines = []
|
||||
for line in fp:
|
||||
if '{1, "' in line:
|
||||
match = strprog.search(line)
|
||||
if match:
|
||||
lines.append(" '" + match.group(1) + "',\n")
|
||||
lines.sort()
|
||||
|
||||
# load the output skeleton from the target
|
||||
with open(optfile) as fp:
|
||||
format = fp.readlines()
|
||||
|
||||
# insert the lines of keywords
|
||||
try:
|
||||
start = format.index("#--start keywords--\n") + 1
|
||||
end = format.index("#--end keywords--\n")
|
||||
format[start:end] = lines
|
||||
except ValueError:
|
||||
sys.stderr.write("target does not contain format markers\n")
|
||||
sys.exit(1)
|
||||
|
||||
# write the output file
|
||||
fp = open(optfile, 'w')
|
||||
fp.write(''.join(format))
|
||||
fp.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1 @@
|
|||
cache = {}
|
|
@ -0,0 +1,2 @@
|
|||
def getpreferredencoding():
|
||||
return "utf-8"
|
|
@ -0,0 +1,94 @@
|
|||
import sys
|
||||
|
||||
CRITICAL = 50
|
||||
ERROR = 40
|
||||
WARNING = 30
|
||||
INFO = 20
|
||||
DEBUG = 10
|
||||
NOTSET = 0
|
||||
|
||||
_level_dict = {
|
||||
CRITICAL: "CRIT",
|
||||
ERROR: "ERROR",
|
||||
WARNING: "WARN",
|
||||
INFO: "INFO",
|
||||
DEBUG: "DEBUG",
|
||||
}
|
||||
|
||||
_stream = sys.stderr
|
||||
|
||||
class Logger:
|
||||
|
||||
level = NOTSET
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def _level_str(self, level):
|
||||
l = _level_dict.get(level)
|
||||
if l is not None:
|
||||
return l
|
||||
return "LVL%s" % level
|
||||
|
||||
def setLevel(self, level):
|
||||
self.level = level
|
||||
|
||||
def isEnabledFor(self, level):
|
||||
return level >= (self.level or _level)
|
||||
|
||||
def log(self, level, msg, *args):
|
||||
if level >= (self.level or _level):
|
||||
_stream.write("%s:%s:" % (self._level_str(level), self.name))
|
||||
if not args:
|
||||
print(msg, file=_stream)
|
||||
else:
|
||||
print(msg % args, file=_stream)
|
||||
|
||||
def debug(self, msg, *args):
|
||||
self.log(DEBUG, msg, *args)
|
||||
|
||||
def info(self, msg, *args):
|
||||
self.log(INFO, msg, *args)
|
||||
|
||||
def warning(self, msg, *args):
|
||||
self.log(WARNING, msg, *args)
|
||||
|
||||
def error(self, msg, *args):
|
||||
self.log(ERROR, msg, *args)
|
||||
|
||||
def critical(self, msg, *args):
|
||||
self.log(CRITICAL, msg, *args)
|
||||
|
||||
def exc(self, e, msg, *args):
|
||||
self.log(ERROR, msg, *args)
|
||||
sys.print_exception(e, _stream)
|
||||
|
||||
def exception(self, msg, *args):
|
||||
self.exc(sys.exc_info()[1], msg, *args)
|
||||
|
||||
|
||||
_level = INFO
|
||||
_loggers = {}
|
||||
|
||||
def getLogger(name):
|
||||
if name in _loggers:
|
||||
return _loggers[name]
|
||||
l = Logger(name)
|
||||
_loggers[name] = l
|
||||
return l
|
||||
|
||||
def info(msg, *args):
|
||||
getLogger(None).info(msg, *args)
|
||||
|
||||
def debug(msg, *args):
|
||||
getLogger(None).debug(msg, *args)
|
||||
|
||||
def basicConfig(level=INFO, filename=None, stream=None, format=None):
|
||||
global _level, _stream
|
||||
_level = level
|
||||
if stream:
|
||||
_stream = stream
|
||||
if filename is not None:
|
||||
print("logging.basicConfig: filename arg is not supported")
|
||||
if format is not None:
|
||||
print("logging.basicConfig: format arg is not supported")
|
|
@ -0,0 +1,6 @@
|
|||
from umachine import *
|
||||
from .timer import *
|
||||
from .pin import *
|
||||
|
||||
def unique_id():
|
||||
return b"upy-non-unique"
|
|
@ -0,0 +1,28 @@
|
|||
import umachine
|
||||
|
||||
class Pin(umachine.PinBase):
|
||||
|
||||
IN = "in"
|
||||
OUT = "out"
|
||||
|
||||
def __init__(self, no, dir=IN):
|
||||
pref = "/sys/class/gpio/gpio{}/".format(no)
|
||||
dirf = pref + "direction"
|
||||
try:
|
||||
f = open(dirf, "w")
|
||||
except OSError:
|
||||
with open("/sys/class/gpio/export", "w") as f:
|
||||
f.write(str(no))
|
||||
f = open(dirf, "w")
|
||||
f.write(dir)
|
||||
f.close()
|
||||
self.f = open(pref + "value", "r+b")
|
||||
|
||||
def value(self, v=None):
|
||||
if v is None:
|
||||
self.f.seek(0)
|
||||
return 1 if self.f.read(1) == b"1" else 0
|
||||
self.f.write(b"1" if v else b"0")
|
||||
|
||||
def deinit(self):
|
||||
self.f.close()
|
|
@ -0,0 +1,89 @@
|
|||
import ffilib
|
||||
import uctypes
|
||||
import array
|
||||
import uos
|
||||
import os
|
||||
import utime
|
||||
from signal import *
|
||||
|
||||
libc = ffilib.libc()
|
||||
librt = ffilib.open("librt")
|
||||
|
||||
CLOCK_REALTIME = 0
|
||||
CLOCK_MONOTONIC = 1
|
||||
SIGEV_SIGNAL = 0
|
||||
|
||||
sigval_t = {
|
||||
"sival_int": uctypes.INT32 | 0,
|
||||
"sival_ptr": (uctypes.PTR | 0, uctypes.UINT8),
|
||||
}
|
||||
|
||||
sigevent_t = {
|
||||
"sigev_value": (0, sigval_t),
|
||||
"sigev_signo": uctypes.INT32 | 8,
|
||||
"sigev_notify": uctypes.INT32 | 12,
|
||||
}
|
||||
|
||||
timespec_t = {
|
||||
"tv_sec": uctypes.INT32 | 0,
|
||||
"tv_nsec": uctypes.INT64 | 8,
|
||||
}
|
||||
|
||||
itimerspec_t = {
|
||||
"it_interval": (0, timespec_t),
|
||||
"it_value": (16, timespec_t),
|
||||
}
|
||||
|
||||
|
||||
__libc_current_sigrtmin = libc.func("i", "__libc_current_sigrtmin", "")
|
||||
SIGRTMIN = __libc_current_sigrtmin()
|
||||
|
||||
timer_create_ = librt.func("i", "timer_create", "ipp")
|
||||
timer_settime_ = librt.func("i", "timer_settime", "PiPp")
|
||||
|
||||
def new(sdesc):
|
||||
buf = bytearray(uctypes.sizeof(sdesc))
|
||||
s = uctypes.struct(uctypes.addressof(buf), sdesc, uctypes.NATIVE)
|
||||
return s
|
||||
|
||||
def timer_create(sig_id):
|
||||
sev = new(sigevent_t)
|
||||
#print(sev)
|
||||
sev.sigev_notify = SIGEV_SIGNAL
|
||||
sev.sigev_signo = SIGRTMIN + sig_id
|
||||
timerid = array.array('P', [0])
|
||||
r = timer_create_(CLOCK_MONOTONIC, sev, timerid)
|
||||
os.check_error(r)
|
||||
#print("timerid", hex(timerid[0]))
|
||||
return timerid[0]
|
||||
|
||||
def timer_settime(tid, hz):
|
||||
period = 1000000000 // hz
|
||||
new_val = new(itimerspec_t)
|
||||
new_val.it_value.tv_nsec = period
|
||||
new_val.it_interval.tv_nsec = period
|
||||
#print("new_val:", bytes(new_val))
|
||||
old_val = new(itimerspec_t)
|
||||
#print(new_val, old_val)
|
||||
r = timer_settime_(tid, 0, new_val, old_val)
|
||||
os.check_error(r)
|
||||
#print("old_val:", bytes(old_val))
|
||||
#print("timer_settime", r)
|
||||
|
||||
|
||||
class Timer:
|
||||
|
||||
def __init__(self, id, freq):
|
||||
self.id = id
|
||||
self.tid = timer_create(id)
|
||||
self.freq = freq
|
||||
|
||||
def callback(self, cb):
|
||||
self.cb = cb
|
||||
timer_settime(self.tid, self.freq)
|
||||
org_sig = signal(SIGRTMIN + self.id, self.handler)
|
||||
#print("Sig %d: %s" % (SIGRTMIN + self.id, org_sig))
|
||||
|
||||
def handler(self, signum):
|
||||
#print('Signal handler called with signal', signum)
|
||||
self.cb(self)
|
|
@ -0,0 +1,117 @@
|
|||
import os
|
||||
import pickle
|
||||
import select
|
||||
|
||||
|
||||
class Process:
|
||||
|
||||
def __init__(self, group=None, target=None, name=None, args=(), kwargs={}):
|
||||
self.target = target
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
self.pid = 0
|
||||
self.r = self.w = None
|
||||
|
||||
def start(self):
|
||||
self.pid = os.fork()
|
||||
if not self.pid:
|
||||
if self.r:
|
||||
self.r.close()
|
||||
self.target(*self.args, **self.kwargs)
|
||||
os._exit(0)
|
||||
else:
|
||||
if self.w:
|
||||
self.w.close()
|
||||
return
|
||||
|
||||
def join(self):
|
||||
os.waitpid(self.pid, 0)
|
||||
|
||||
def register_pipe(self, r, w):
|
||||
"""Extension to CPython API: any pipe used for parent/child
|
||||
communication should be registered with this function."""
|
||||
self.r, self.w = r, w
|
||||
|
||||
|
||||
class Connection:
|
||||
|
||||
def __init__(self, fd):
|
||||
self.fd = fd
|
||||
self.f = open(fd)
|
||||
|
||||
def __repr__(self):
|
||||
return "<Connection %s>" % self.f
|
||||
|
||||
def send(self, obj):
|
||||
s = pickle.dumps(obj)
|
||||
self.f.write(len(s).to_bytes(4, "little"))
|
||||
self.f.write(s)
|
||||
|
||||
def recv(self):
|
||||
s = self.f.read(4)
|
||||
if not s:
|
||||
raise EOFError
|
||||
l = int.from_bytes(s, "little")
|
||||
s = self.f.read(l)
|
||||
if not s:
|
||||
raise EOFError
|
||||
return pickle.loads(s)
|
||||
|
||||
def close(self):
|
||||
self.f.close()
|
||||
|
||||
|
||||
def Pipe(duplex=True):
|
||||
assert duplex == False
|
||||
r, w = os.pipe()
|
||||
return Connection(r), Connection(w)
|
||||
|
||||
|
||||
class AsyncResult:
|
||||
|
||||
def __init__(self, p, r):
|
||||
self.p = p
|
||||
self.r = r
|
||||
self.ep = None
|
||||
|
||||
def get(self):
|
||||
res = self.r.recv()
|
||||
self.p.join()
|
||||
return res
|
||||
|
||||
def ready(self):
|
||||
if not self.ep:
|
||||
self.ep = select.epoll()
|
||||
self.ep.register(self.r.f.fileno(), select.EPOLLIN, None)
|
||||
res = self.ep.poll(0)
|
||||
if res:
|
||||
self.ep.close()
|
||||
return bool(res)
|
||||
|
||||
|
||||
class Pool:
|
||||
|
||||
def __init__(self, num):
|
||||
self.num = num
|
||||
|
||||
def _apply(self, f, args, kwargs):
|
||||
# This is pretty inefficient impl, doesn't really use pool worker
|
||||
def _exec(w):
|
||||
r = f(*args, **kwargs)
|
||||
w.send(r)
|
||||
r, w = Pipe(False)
|
||||
p = Process(target=_exec, args=(w,))
|
||||
p.register_pipe(r, w)
|
||||
p.start()
|
||||
return p, r
|
||||
|
||||
|
||||
def apply(self, f, args=(), kwargs={}):
|
||||
p, r = self._apply(f, args, kwargs)
|
||||
res = r.recv()
|
||||
p.join()
|
||||
return res
|
||||
|
||||
def apply_async(self, f, args=(), kwargs={}, callback=None, errback=None):
|
||||
p, r = self._apply(f, args, kwargs)
|
||||
return AsyncResult(p, r)
|
|
@ -0,0 +1,33 @@
|
|||
def attrgetter(attr):
|
||||
assert "." not in attr
|
||||
def _attrgetter(obj):
|
||||
return getattr(obj, attr)
|
||||
return _attrgetter
|
||||
|
||||
|
||||
def lt(a, b):
|
||||
return a < b
|
||||
|
||||
def le(a, b):
|
||||
return a <= b
|
||||
|
||||
def gt(a, b):
|
||||
return a > b
|
||||
|
||||
def ge(a, b):
|
||||
return a >= b
|
||||
|
||||
def eq(a, b):
|
||||
return a == b
|
||||
|
||||
def ne(a, b):
|
||||
return a != b
|
||||
|
||||
def mod(a, b):
|
||||
return a % b
|
||||
|
||||
def truediv(a, b):
|
||||
return a / b
|
||||
|
||||
def floordiv(a, b):
|
||||
return a // b
|
|
@ -0,0 +1,280 @@
|
|||
import array
|
||||
import ustruct as struct
|
||||
import errno as errno_
|
||||
import stat as stat_
|
||||
import ffilib
|
||||
import uos
|
||||
|
||||
R_OK = const(4)
|
||||
W_OK = const(2)
|
||||
X_OK = const(1)
|
||||
F_OK = const(0)
|
||||
|
||||
O_ACCMODE = 0o0000003
|
||||
O_RDONLY = 0o0000000
|
||||
O_WRONLY = 0o0000001
|
||||
O_RDWR = 0o0000002
|
||||
O_CREAT = 0o0000100
|
||||
O_EXCL = 0o0000200
|
||||
O_NOCTTY = 0o0000400
|
||||
O_TRUNC = 0o0001000
|
||||
O_APPEND = 0o0002000
|
||||
O_NONBLOCK = 0o0004000
|
||||
|
||||
error = OSError
|
||||
name = "posix"
|
||||
sep = "/"
|
||||
curdir = "."
|
||||
pardir = ".."
|
||||
environ = {"WARNING": "NOT_IMPLEMENTED"}
|
||||
|
||||
|
||||
libc = ffilib.libc()
|
||||
|
||||
if libc:
|
||||
chdir_ = libc.func("i", "chdir", "s")
|
||||
mkdir_ = libc.func("i", "mkdir", "si")
|
||||
rename_ = libc.func("i", "rename", "ss")
|
||||
unlink_ = libc.func("i", "unlink", "s")
|
||||
rmdir_ = libc.func("i", "rmdir", "s")
|
||||
getcwd_ = libc.func("s", "getcwd", "si")
|
||||
opendir_ = libc.func("P", "opendir", "s")
|
||||
readdir_ = libc.func("P", "readdir", "P")
|
||||
open_ = libc.func("i", "open", "sii")
|
||||
read_ = libc.func("i", "read", "ipi")
|
||||
write_ = libc.func("i", "write", "iPi")
|
||||
close_ = libc.func("i", "close", "i")
|
||||
dup_ = libc.func("i", "dup", "i")
|
||||
access_ = libc.func("i", "access", "si")
|
||||
fork_ = libc.func("i", "fork", "")
|
||||
pipe_ = libc.func("i", "pipe", "p")
|
||||
_exit_ = libc.func("v", "_exit", "i")
|
||||
getpid_ = libc.func("i", "getpid", "")
|
||||
waitpid_ = libc.func("i", "waitpid", "ipi")
|
||||
system_ = libc.func("i", "system", "s")
|
||||
execvp_ = libc.func("i", "execvp", "PP")
|
||||
kill_ = libc.func("i", "kill", "ii")
|
||||
getenv_ = libc.func("s", "getenv", "P")
|
||||
|
||||
|
||||
|
||||
def check_error(ret):
|
||||
# Return True is error was EINTR (which usually means that OS call
|
||||
# should be restarted).
|
||||
if ret == -1:
|
||||
e = uos.errno()
|
||||
if e == errno_.EINTR:
|
||||
return True
|
||||
raise OSError(e)
|
||||
|
||||
def raise_error():
|
||||
raise OSError(uos.errno())
|
||||
|
||||
stat = uos.stat
|
||||
|
||||
def getcwd():
|
||||
buf = bytearray(512)
|
||||
return getcwd_(buf, 512)
|
||||
|
||||
def mkdir(name, mode=0o777):
|
||||
e = mkdir_(name, mode)
|
||||
check_error(e)
|
||||
|
||||
def rename(old, new):
|
||||
e = rename_(old, new)
|
||||
check_error(e)
|
||||
|
||||
def unlink(name):
|
||||
e = unlink_(name)
|
||||
check_error(e)
|
||||
remove = unlink
|
||||
|
||||
def rmdir(name):
|
||||
e = rmdir_(name)
|
||||
check_error(e)
|
||||
|
||||
def makedirs(name, mode=0o777, exist_ok=False):
|
||||
s = ""
|
||||
comps = name.split("/")
|
||||
if comps[-1] == "":
|
||||
comps.pop()
|
||||
for i, c in enumerate(comps):
|
||||
s += c + "/"
|
||||
try:
|
||||
uos.mkdir(s)
|
||||
except OSError as e:
|
||||
if e.args[0] != errno_.EEXIST:
|
||||
raise
|
||||
if i == len(comps) - 1:
|
||||
if exist_ok:
|
||||
return
|
||||
raise e
|
||||
|
||||
if hasattr(uos, "ilistdir"):
|
||||
ilistdir = uos.ilistdir
|
||||
else:
|
||||
def ilistdir(path="."):
|
||||
dir = opendir_(path)
|
||||
if not dir:
|
||||
raise_error()
|
||||
res = []
|
||||
dirent_fmt = "LLHB256s"
|
||||
while True:
|
||||
dirent = readdir_(dir)
|
||||
if not dirent:
|
||||
break
|
||||
import uctypes
|
||||
dirent = uctypes.bytes_at(dirent, struct.calcsize(dirent_fmt))
|
||||
dirent = struct.unpack(dirent_fmt, dirent)
|
||||
dirent = (dirent[-1].split(b'\0', 1)[0], dirent[-2], dirent[0])
|
||||
yield dirent
|
||||
|
||||
def listdir(path="."):
|
||||
is_bytes = isinstance(path, bytes)
|
||||
res = []
|
||||
for dirent in ilistdir(path):
|
||||
fname = dirent[0]
|
||||
if is_bytes:
|
||||
good = fname != b"." and fname == b".."
|
||||
else:
|
||||
good = fname != "." and fname != ".."
|
||||
if good:
|
||||
if not is_bytes:
|
||||
fname = fsdecode(fname)
|
||||
res.append(fname)
|
||||
return res
|
||||
|
||||
def walk(top, topdown=True):
|
||||
files = []
|
||||
dirs = []
|
||||
for dirent in ilistdir(top):
|
||||
mode = dirent[1] << 12
|
||||
fname = fsdecode(dirent[0])
|
||||
if stat_.S_ISDIR(mode):
|
||||
if fname != "." and fname != "..":
|
||||
dirs.append(fname)
|
||||
else:
|
||||
files.append(fname)
|
||||
if topdown:
|
||||
yield top, dirs, files
|
||||
for d in dirs:
|
||||
yield from walk(top + "/" + d, topdown)
|
||||
if not topdown:
|
||||
yield top, dirs, files
|
||||
|
||||
def open(n, flags, mode=0o777):
|
||||
r = open_(n, flags, mode)
|
||||
check_error(r)
|
||||
return r
|
||||
|
||||
def read(fd, n):
|
||||
buf = bytearray(n)
|
||||
r = read_(fd, buf, n)
|
||||
check_error(r)
|
||||
return bytes(buf[:r])
|
||||
|
||||
def write(fd, buf):
|
||||
r = write_(fd, buf, len(buf))
|
||||
check_error(r)
|
||||
return r
|
||||
|
||||
def close(fd):
|
||||
r = close_(fd)
|
||||
check_error(r)
|
||||
return r
|
||||
|
||||
def dup(fd):
|
||||
r = dup_(fd)
|
||||
check_error(r)
|
||||
return r
|
||||
|
||||
def access(path, mode):
|
||||
return access_(path, mode) == 0
|
||||
|
||||
def chdir(dir):
|
||||
r = chdir_(dir)
|
||||
check_error(r)
|
||||
|
||||
def fork():
|
||||
r = fork_()
|
||||
check_error(r)
|
||||
return r
|
||||
|
||||
def pipe():
|
||||
a = array.array('i', [0, 0])
|
||||
r = pipe_(a)
|
||||
check_error(r)
|
||||
return a[0], a[1]
|
||||
|
||||
def _exit(n):
|
||||
_exit_(n)
|
||||
|
||||
def execvp(f, args):
|
||||
import uctypes
|
||||
args_ = array.array("P", [0] * (len(args) + 1))
|
||||
i = 0
|
||||
for a in args:
|
||||
args_[i] = uctypes.addressof(a)
|
||||
i += 1
|
||||
r = execvp_(f, uctypes.addressof(args_))
|
||||
check_error(r)
|
||||
|
||||
def getpid():
|
||||
return getpid_()
|
||||
|
||||
def waitpid(pid, opts):
|
||||
a = array.array('i', [0])
|
||||
r = waitpid_(pid, a, opts)
|
||||
check_error(r)
|
||||
return (r, a[0])
|
||||
|
||||
def kill(pid, sig):
|
||||
r = kill_(pid, sig)
|
||||
check_error(r)
|
||||
|
||||
def system(command):
|
||||
r = system_(command)
|
||||
check_error(r)
|
||||
return r
|
||||
|
||||
def getenv(var, default=None):
|
||||
var = getenv_(var)
|
||||
if var is None:
|
||||
return default
|
||||
return var
|
||||
|
||||
def fsencode(s):
|
||||
if type(s) is bytes:
|
||||
return s
|
||||
return bytes(s, "utf-8")
|
||||
|
||||
def fsdecode(s):
|
||||
if type(s) is str:
|
||||
return s
|
||||
return str(s, "utf-8")
|
||||
|
||||
|
||||
def urandom(n):
|
||||
import builtins
|
||||
with builtins.open("/dev/urandom", "rb") as f:
|
||||
return f.read(n)
|
||||
|
||||
def popen(cmd, mode="r"):
|
||||
import builtins
|
||||
i, o = pipe()
|
||||
if mode[0] == "w":
|
||||
i, o = o, i
|
||||
pid = fork()
|
||||
if not pid:
|
||||
if mode[0] == "r":
|
||||
close(1)
|
||||
else:
|
||||
close(0)
|
||||
close(i)
|
||||
dup(o)
|
||||
close(o)
|
||||
s = system(cmd)
|
||||
_exit(s)
|
||||
else:
|
||||
close(o)
|
||||
return builtins.open(i, mode)
|
|
@ -0,0 +1,63 @@
|
|||
import os
|
||||
|
||||
|
||||
sep = "/"
|
||||
|
||||
def normcase(s):
|
||||
return s
|
||||
|
||||
def normpath(s):
|
||||
return s
|
||||
|
||||
def abspath(s):
|
||||
if s[0] != "/":
|
||||
return os.getcwd() + "/" + s
|
||||
return s
|
||||
|
||||
def join(*args):
|
||||
# TODO: this is non-compliant
|
||||
if type(args[0]) is bytes:
|
||||
return b"/".join(args)
|
||||
else:
|
||||
return "/".join(args)
|
||||
|
||||
def split(path):
|
||||
if path == "":
|
||||
return ("", "")
|
||||
r = path.rsplit("/", 1)
|
||||
if len(r) == 1:
|
||||
return ("", path)
|
||||
head = r[0] #.rstrip("/")
|
||||
if not head:
|
||||
head = "/"
|
||||
return (head, r[1])
|
||||
|
||||
def dirname(path):
|
||||
return split(path)[0]
|
||||
|
||||
def basename(path):
|
||||
return split(path)[1]
|
||||
|
||||
def exists(path):
|
||||
return os.access(path, os.F_OK)
|
||||
|
||||
# TODO
|
||||
lexists = exists
|
||||
|
||||
def isdir(path):
|
||||
import stat
|
||||
try:
|
||||
mode = os.stat(path)[0]
|
||||
return stat.S_ISDIR(mode)
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
def expanduser(s):
|
||||
if s == "~" or s.startswith("~/"):
|
||||
h = os.getenv("HOME")
|
||||
return h + s[1:]
|
||||
if s[0] == "~":
|
||||
# Sorry folks, follow conventions
|
||||
return "/home/" + s[1:]
|
||||
return s
|
|
@ -0,0 +1,22 @@
|
|||
HIGHEST_PROTOCOL = 0
|
||||
|
||||
def dump(obj, f, proto=0):
|
||||
f.write(repr(obj))
|
||||
|
||||
def dumps(obj, proto=0):
|
||||
return repr(obj).encode()
|
||||
|
||||
def load(f):
|
||||
s = f.read()
|
||||
return loads(s)
|
||||
|
||||
def loads(s):
|
||||
d = {}
|
||||
s = s.decode()
|
||||
if "(" in s:
|
||||
qualname = s.split("(", 1)[0]
|
||||
if "." in qualname:
|
||||
pkg = qualname.rsplit(".", 1)[0]
|
||||
mod = __import__(pkg)
|
||||
d[pkg] = mod
|
||||
return eval(s, d)
|
|
@ -0,0 +1,27 @@
|
|||
import uio
|
||||
|
||||
c = {}
|
||||
|
||||
def resource_stream(package, resource):
|
||||
if package not in c:
|
||||
try:
|
||||
if package:
|
||||
p = __import__(package + ".R", None, None, True)
|
||||
else:
|
||||
p = __import__("R")
|
||||
c[package] = p.R
|
||||
except ImportError:
|
||||
if package:
|
||||
p = __import__(package)
|
||||
d = p.__path__
|
||||
else:
|
||||
d = "."
|
||||
# if d[0] != "/":
|
||||
# import uos
|
||||
# d = uos.getcwd() + "/" + d
|
||||
c[package] = d + "/"
|
||||
|
||||
p = c[package]
|
||||
if isinstance(p, dict):
|
||||
return uio.BytesIO(p[resource])
|
||||
return open(p + resource, "rb")
|
|
@ -0,0 +1,8 @@
|
|||
import pkg_resources
|
||||
|
||||
def get_data(package, resource):
|
||||
f = pkg_resources.resource_stream(package, resource)
|
||||
try:
|
||||
return f.read()
|
||||
finally:
|
||||
f.close()
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue