* Add upip dependencies

* Add firmware updater
* Add urlencode lib
* Add tests for http library
* Support form encoding in http post
dont-delete-test-download-branch
Marek Ventur 2018-07-27 23:35:39 +01:00
parent 992d00dc0f
commit 1fd7f5c0db
155 changed files with 27456 additions and 71 deletions

View File

@ -6,6 +6,7 @@ _pyb = None
def get_pyb(args):
global _pyb
if not _pyb:
print("Connected to badge:", end="")
if not args.device:
args.device = find_tty()
@ -13,10 +14,10 @@ def get_pyb(args):
try:
_pyb = Pyboard(args.device, args.baudrate, None, None, args.wait)
except PyboardError as er:
print(" FAIL")
print(er)
sys.exit(1)
print("Connected to badge.")
print(" DONE")
return _pyb
def close_pyb():
@ -24,10 +25,13 @@ def close_pyb():
if _pyb:
_pyb.close()
def stop_badge(args):
def stop_badge(args, verbose):
pyb = get_pyb(args)
print("stopping running app")
if verbose:
print("Stopping running app:", end="")
write_command(pyb, b'\r\x03\x03') # ctrl-C twice: interrupt any running program
if verbose:
print(" DONE")
def write_command(pyb, command):
flush_input(pyb)
@ -42,14 +46,15 @@ def flush_input(pyb):
def soft_reset(args):
pyb = get_pyb(args)
print("trying to soft reboot badge")
print("Soft reboot:", end="")
write_command(pyb, b'\x04') # ctrl-D: soft reset
#print("1")
data = pyb.read_until(1, b'soft reboot\r\n')
#print("2")
if data.endswith(b'soft reboot\r\n'):
print("Soft reboot was successful.")
print(" DONE")
else:
print(" FAIL")
raise PyboardError('could not soft reboot')
def find_tty():
@ -60,10 +65,17 @@ def find_tty():
print("Couldn't find badge tty - Please make it's plugged in and reset it if necessary")
sys.exit(1)
def check_run(args):
if args.command is not None or len(args.paths):
for filename in args.paths:
with open(filename, 'r') as f:
pyfile = f.read()
compile(pyfile + '\n', filename, 'exec')
def run(args):
pyb = get_pyb(args)
print("executing %s" % args.paths)
print("----------------")
print("Preparing execution:", end="")
# run any command or file(s) - this is mostly a copy from pyboard.py
if args.command is not None or len(args.paths):
# we must enter raw-REPL mode to execute commands
@ -71,9 +83,11 @@ def run(args):
try:
pyb.enter_raw_repl()
except PyboardError as er:
print(" FAIL")
print(er)
pyb.close()
sys.exit(1)
print(" DONE")
def execbuffer(buf):
try:
@ -93,6 +107,7 @@ def run(args):
# run any files
for filename in args.paths:
with open(filename, 'rb') as f:
print("-------- %s --------" % filename)
pyfile = f.read()
execbuffer(pyfile)

543
.development/pydfu.py Normal file
View File

@ -0,0 +1,543 @@
#!/usr/bin/env python
# This file is part of the OpenMV project.
# Copyright (c) 2013/2014 Ibrahim Abdelkader <i.abdalkader@gmail.com>
# This work is licensed under the MIT license, see the file LICENSE for
# details.
"""This module implements enough functionality to program the STM32F4xx over
DFU, without requiring dfu-util.
See app note AN3156 for a description of the DFU protocol.
See document UM0391 for a dscription of the DFuse file.
"""
from __future__ import print_function
import argparse
import re
import struct
import sys
import usb.core
import usb.util
import zlib
# VID/PID
__VID = 0x0483
__PID = 0xdf11
# USB request __TIMEOUT
__TIMEOUT = 4000
# DFU commands
__DFU_DETACH = 0
__DFU_DNLOAD = 1
__DFU_UPLOAD = 2
__DFU_GETSTATUS = 3
__DFU_CLRSTATUS = 4
__DFU_GETSTATE = 5
__DFU_ABORT = 6
# DFU status
__DFU_STATE_APP_IDLE = 0x00
__DFU_STATE_APP_DETACH = 0x01
__DFU_STATE_DFU_IDLE = 0x02
__DFU_STATE_DFU_DOWNLOAD_SYNC = 0x03
__DFU_STATE_DFU_DOWNLOAD_BUSY = 0x04
__DFU_STATE_DFU_DOWNLOAD_IDLE = 0x05
__DFU_STATE_DFU_MANIFEST_SYNC = 0x06
__DFU_STATE_DFU_MANIFEST = 0x07
__DFU_STATE_DFU_MANIFEST_WAIT_RESET = 0x08
__DFU_STATE_DFU_UPLOAD_IDLE = 0x09
__DFU_STATE_DFU_ERROR = 0x0a
_DFU_DESCRIPTOR_TYPE = 0x21
# USB device handle
__dev = None
__verbose = None
# USB DFU interface
__DFU_INTERFACE = 0
import inspect
if 'length' in inspect.getfullargspec(usb.util.get_string).args:
# PyUSB 1.0.0.b1 has the length argument
def get_string(dev, index):
return usb.util.get_string(dev, 255, index)
else:
# PyUSB 1.0.0.b2 dropped the length argument
def get_string(dev, index):
return usb.util.get_string(dev, index)
def init():
"""Initializes the found DFU device so that we can program it."""
global __dev
devices = get_dfu_devices(idVendor=__VID, idProduct=__PID)
if not devices:
raise ValueError('No DFU device found')
if len(devices) > 1:
raise ValueError("Multiple DFU devices found")
__dev = devices[0]
__dev.set_configuration()
# Claim DFU interface
usb.util.claim_interface(__dev, __DFU_INTERFACE)
# Clear status
clr_status()
def clr_status():
"""Clears any error status (perhaps left over from a previous session)."""
__dev.ctrl_transfer(0x21, __DFU_CLRSTATUS, 0, __DFU_INTERFACE,
None, __TIMEOUT)
def get_status():
"""Get the status of the last operation."""
stat = __dev.ctrl_transfer(0xA1, __DFU_GETSTATUS, 0, __DFU_INTERFACE,
6, 20000)
# print (__DFU_STAT[stat[4]], stat)
return stat[4]
def mass_erase():
"""Performs a MASS erase (i.e. erases the entire device."""
# Send DNLOAD with first byte=0x41
__dev.ctrl_transfer(0x21, __DFU_DNLOAD, 0, __DFU_INTERFACE,
"\x41", __TIMEOUT)
# Execute last command
if get_status() != __DFU_STATE_DFU_DOWNLOAD_BUSY:
raise Exception("DFU: erase failed")
# Check command state
if get_status() != __DFU_STATE_DFU_DOWNLOAD_IDLE:
raise Exception("DFU: erase failed")
def page_erase(addr):
"""Erases a single page."""
if __verbose:
print("Erasing page: 0x%x..." % (addr))
# Send DNLOAD with first byte=0x41 and page address
buf = struct.pack("<BI", 0x41, addr)
__dev.ctrl_transfer(0x21, __DFU_DNLOAD, 0, __DFU_INTERFACE, buf, __TIMEOUT)
# Execute last command
if get_status() != __DFU_STATE_DFU_DOWNLOAD_BUSY:
raise Exception("DFU: erase failed")
# Check command state
if get_status() != __DFU_STATE_DFU_DOWNLOAD_IDLE:
raise Exception("DFU: erase failed")
def set_address(addr):
"""Sets the address for the next operation."""
# Send DNLOAD with first byte=0x21 and page address
buf = struct.pack("<BI", 0x21, addr)
__dev.ctrl_transfer(0x21, __DFU_DNLOAD, 0, __DFU_INTERFACE, buf, __TIMEOUT)
# Execute last command
if get_status() != __DFU_STATE_DFU_DOWNLOAD_BUSY:
raise Exception("DFU: set address failed")
# Check command state
if get_status() != __DFU_STATE_DFU_DOWNLOAD_IDLE:
raise Exception("DFU: set address failed")
def write_memory(addr, buf, progress=None, progress_addr=0, progress_size=0):
"""Writes a buffer into memory. This routine assumes that memory has
already been erased.
"""
xfer_count = 0
xfer_bytes = 0
xfer_total = len(buf)
xfer_base = addr
while xfer_bytes < xfer_total:
if __verbose and xfer_count % 512 == 0:
print ("Addr 0x%x %dKBs/%dKBs..." % (xfer_base + xfer_bytes,
xfer_bytes // 1024,
xfer_total // 1024))
if progress and xfer_count % 2 == 0:
progress(progress_addr, xfer_base + xfer_bytes - progress_addr,
progress_size)
# Set mem write address
set_address(xfer_base+xfer_bytes)
# Send DNLOAD with fw data
# the "2048" is the DFU transfer size supported by the ST DFU bootloader
# TODO: this number should be extracted from the USB config descriptor
chunk = min(2048, xfer_total-xfer_bytes)
__dev.ctrl_transfer(0x21, __DFU_DNLOAD, 2, __DFU_INTERFACE,
buf[xfer_bytes:xfer_bytes + chunk], __TIMEOUT)
# Execute last command
if get_status() != __DFU_STATE_DFU_DOWNLOAD_BUSY:
raise Exception("DFU: write memory failed")
# Check command state
if get_status() != __DFU_STATE_DFU_DOWNLOAD_IDLE:
raise Exception("DFU: write memory failed")
xfer_count += 1
xfer_bytes += chunk
def write_page(buf, xfer_offset):
"""Writes a single page. This routine assumes that memory has already
been erased.
"""
xfer_base = 0x08000000
# Set mem write address
set_address(xfer_base+xfer_offset)
# Send DNLOAD with fw data
__dev.ctrl_transfer(0x21, __DFU_DNLOAD, 2, __DFU_INTERFACE, buf, __TIMEOUT)
# Execute last command
if get_status() != __DFU_STATE_DFU_DOWNLOAD_BUSY:
raise Exception("DFU: write memory failed")
# Check command state
if get_status() != __DFU_STATE_DFU_DOWNLOAD_IDLE:
raise Exception("DFU: write memory failed")
if __verbose:
print ("Write: 0x%x " % (xfer_base + xfer_offset))
def exit_dfu():
"""Exit DFU mode, and start running the program."""
# set jump address
set_address(0x08000000)
# Send DNLOAD with 0 length to exit DFU
__dev.ctrl_transfer(0x21, __DFU_DNLOAD, 0, __DFU_INTERFACE,
None, __TIMEOUT)
try:
# Execute last command
if get_status() != __DFU_STATE_DFU_MANIFEST:
print("Failed to reset device")
# Release device
usb.util.dispose_resources(__dev)
except:
pass
def named(values, names):
"""Creates a dict with `names` as fields, and `values` as values."""
return dict(zip(names.split(), values))
def consume(fmt, data, names):
"""Parses the struct defined by `fmt` from `data`, stores the parsed fields
into a named tuple using `names`. Returns the named tuple, and the data
with the struct stripped off."""
size = struct.calcsize(fmt)
return named(struct.unpack(fmt, data[:size]), names), data[size:]
def cstring(string):
"""Extracts a null-terminated string from a byte array."""
return string.decode('utf-8').split('\0', 1)[0]
def compute_crc(data):
"""Computes the CRC32 value for the data passed in."""
return 0xFFFFFFFF & -zlib.crc32(data) - 1
def read_dfu_file(filename):
"""Reads a DFU file, and parses the individual elements from the file.
Returns an array of elements. Each element is a dictionary with the
following keys:
num - The element index
address - The address that the element data should be written to.
size - The size of the element ddata.
data - The element data.
If an error occurs while parsing the file, then None is returned.
"""
print("File: {}".format(filename))
with open(filename, 'rb') as fin:
data = fin.read()
crc = compute_crc(data[:-4])
elements = []
# Decode the DFU Prefix
#
# <5sBIB
# < little endian
# 5s char[5] signature "DfuSe"
# B uint8_t version 1
# I uint32_t size Size of the DFU file (not including suffix)
# B uint8_t targets Number of targets
dfu_prefix, data = consume('<5sBIB', data,
'signature version size targets')
print (" %(signature)s v%(version)d, image size: %(size)d, "
"targets: %(targets)d" % dfu_prefix)
for target_idx in range(dfu_prefix['targets']):
# Decode the Image Prefix
#
# <6sBI255s2I
# < little endian
# 6s char[6] signature "Target"
# B uint8_t altsetting
# I uint32_t named bool indicating if a name was used
# 255s char[255] name name of the target
# I uint32_t size size of image (not incl prefix)
# I uint32_t elements Number of elements in the image
img_prefix, data = consume('<6sBI255s2I', data,
'signature altsetting named name '
'size elements')
img_prefix['num'] = target_idx
if img_prefix['named']:
img_prefix['name'] = cstring(img_prefix['name'])
else:
img_prefix['name'] = ''
print(' %(signature)s %(num)d, alt setting: %(altsetting)s, '
'name: "%(name)s", size: %(size)d, elements: %(elements)d'
% img_prefix)
target_size = img_prefix['size']
target_data, data = data[:target_size], data[target_size:]
for elem_idx in range(img_prefix['elements']):
# Decode target prefix
# < little endian
# I uint32_t element address
# I uint32_t element size
elem_prefix, target_data = consume('<2I', target_data, 'addr size')
elem_prefix['num'] = elem_idx
print(' %(num)d, address: 0x%(addr)08x, size: %(size)d'
% elem_prefix)
elem_size = elem_prefix['size']
elem_data = target_data[:elem_size]
target_data = target_data[elem_size:]
elem_prefix['data'] = elem_data
elements.append(elem_prefix)
if len(target_data):
print("target %d PARSE ERROR" % target_idx)
# Decode DFU Suffix
# < little endian
# H uint16_t device Firmware version
# H uint16_t product
# H uint16_t vendor
# H uint16_t dfu 0x11a (DFU file format version)
# 3s char[3] ufd 'UFD'
# B uint8_t len 16
# I uint32_t crc32
dfu_suffix = named(struct.unpack('<4H3sBI', data[:16]),
'device product vendor dfu ufd len crc')
print (' usb: %(vendor)04x:%(product)04x, device: 0x%(device)04x, '
'dfu: 0x%(dfu)04x, %(ufd)s, %(len)d, 0x%(crc)08x' % dfu_suffix)
if crc != dfu_suffix['crc']:
print("CRC ERROR: computed crc32 is 0x%08x" % crc)
return
data = data[16:]
if data:
print("PARSE ERROR")
return
return elements
class FilterDFU(object):
"""Class for filtering USB devices to identify devices which are in DFU
mode.
"""
def __call__(self, device):
for cfg in device:
for intf in cfg:
return (intf.bInterfaceClass == 0xFE and
intf.bInterfaceSubClass == 1)
def get_dfu_devices(*args, **kwargs):
"""Returns a list of USB device which are currently in DFU mode.
Additional filters (like idProduct and idVendor) can be passed in to
refine the search.
"""
# convert to list for compatibility with newer pyusb
return list(usb.core.find(*args, find_all=True,
custom_match=FilterDFU(), **kwargs))
def get_memory_layout(device):
"""Returns an array which identifies the memory layout. Each entry
of the array will contain a dictionary with the following keys:
addr - Address of this memory segment
last_addr - Last address contained within the memory segment.
size - size of the segment, in bytes
num_pages - number of pages in the segment
page_size - size of each page, in bytes
"""
cfg = device[0]
intf = cfg[(0, 0)]
mem_layout_str = get_string(device, intf.iInterface)
mem_layout = mem_layout_str.split('/')
result = []
for mem_layout_index in range(1, len(mem_layout), 2):
addr = int(mem_layout[mem_layout_index], 0)
segments = mem_layout[mem_layout_index + 1].split(',')
seg_re = re.compile(r'(\d+)\*(\d+)(.)(.)')
for segment in segments:
seg_match = seg_re.match(segment)
num_pages = int(seg_match.groups()[0], 10)
page_size = int(seg_match.groups()[1], 10)
multiplier = seg_match.groups()[2]
if multiplier == 'K':
page_size *= 1024
if multiplier == 'M':
page_size *= 1024 * 1024
size = num_pages * page_size
last_addr = addr + size - 1
result.append(named((addr, last_addr, size, num_pages, page_size),
"addr last_addr size num_pages page_size"))
addr += size
return result
def list_dfu_devices(*args, **kwargs):
"""Prints a lits of devices detected in DFU mode."""
devices = get_dfu_devices(*args, **kwargs)
if not devices:
print("No DFU capable devices found")
return
for device in devices:
print("Bus {} Device {:03d}: ID {:04x}:{:04x}"
.format(device.bus, device.address,
device.idVendor, device.idProduct))
layout = get_memory_layout(device)
print("Memory Layout")
for entry in layout:
print(" 0x{:x} {:2d} pages of {:3d}K bytes"
.format(entry['addr'], entry['num_pages'],
entry['page_size'] // 1024))
def write_elements(elements, mass_erase_used, progress=None):
"""Writes the indicated elements into the target memory,
erasing as needed.
"""
mem_layout = get_memory_layout(__dev)
for elem in elements:
addr = elem['addr']
size = elem['size']
data = elem['data']
elem_size = size
elem_addr = addr
if progress:
progress(elem_addr, 0, elem_size)
while size > 0:
write_size = size
if not mass_erase_used:
for segment in mem_layout:
if addr >= segment['addr'] and \
addr <= segment['last_addr']:
# We found the page containing the address we want to
# write, erase it
page_size = segment['page_size']
page_addr = addr & ~(page_size - 1)
if addr + write_size > page_addr + page_size:
write_size = page_addr + page_size - addr
page_erase(page_addr)
break
write_memory(addr, data[:write_size], progress,
elem_addr, elem_size)
data = data[write_size:]
addr += write_size
size -= write_size
if progress:
progress(elem_addr, addr - elem_addr, elem_size)
def cli_progress(addr, offset, size):
"""Prints a progress report suitable for use on the command line."""
width = 25
done = offset * width // size
print("\r0x{:08x} {:7d} [{}{}] {:3d}% "
.format(addr, size, '=' * done, ' ' * (width - done),
offset * 100 // size), end="")
sys.stdout.flush()
if offset == size:
print("")
def main():
"""Test program for verifying this files functionality."""
global __verbose
# Parse CMD args
parser = argparse.ArgumentParser(description='DFU Python Util')
#parser.add_argument("path", help="file path")
parser.add_argument(
"-l", "--list",
help="list available DFU devices",
action="store_true",
default=False
)
parser.add_argument(
"-m", "--mass-erase",
help="mass erase device",
action="store_true",
default=False
)
parser.add_argument(
"-u", "--upload",
help="read file from DFU device",
dest="path",
default=False
)
parser.add_argument(
"-v", "--verbose",
help="increase output verbosity",
action="store_true",
default=False
)
args = parser.parse_args()
__verbose = args.verbose
if args.list:
list_dfu_devices(idVendor=__VID, idProduct=__PID)
return
init()
if args.mass_erase:
print ("Mass erase...")
mass_erase()
if args.path:
elements = read_dfu_file(args.path)
if not elements:
return
print("Writing memory...")
write_elements(elements, args.mass_erase, progress=cli_progress)
print("Exiting DFU...")
exit_dfu()
return
print("No command specified")
if __name__ == '__main__':
main()

View File

@ -0,0 +1,49 @@
from pydfu import *
import urllib.request, tempfile, os, shutil, ssl
def firmware_update(verbose):
global __verbose
__verbose = verbose
temp_path = tempfile.mktemp("firmware.dfu")
url = "https://update.badge.emfcamp.org/firmware.dfu"
print("Hello - Welcome to the automated TiLDA Mk4 firmware updater")
print("Finding badge: ", end="")
try:
init()
print("DONE")
print("Downloading newest firmware: ", end="")
context = ssl._create_unverified_context()
with urllib.request.urlopen(url, context=context) as response:
with open(temp_path, 'wb') as tmp_file:
shutil.copyfileobj(response, tmp_file)
print("DONE")
elements = read_dfu_file(temp_path)
if not elements:
return
print("Resetting Badge: ", end="")
mass_erase()
print("DONE")
print("Updating...")
write_elements(elements, True, progress=cli_progress)
exit_dfu()
print("")
print("You can now restart your badge by pressing the reset button on the back. Please follow the instructions on the screen to finish the setup")
print("Have a nice day!")
except ValueError as e:
print("FAIL")
print("")
print("We couldn't find your badge. You need to make sure it's plugged in and in DFU mode.")
print("To put your badge into DFU mode you need to press the joystick in the middle while pressing the reset button at the back.")
print("After that, please try this script again.")
print()
print("Error: %s" %(e))
finally:
if os.path.isfile(temp_path): os.remove(temp_path)

View File

@ -30,8 +30,9 @@ This module has the following operations:
resources = get_resources(path) # Gets resources for a given path
add_hashes(path, resources) # Adds hashes to the file dict - not needed for testing
add_metadata(path, resources) # Adds metadata
resolve_dependencies(resources) # Merges all dependencies into each resource's file dict
validate(resources) # Runs basic validation
resolve_dependencies(resources) # Merges all dependencies into each resource's file dict
remove_upip(resources) # Remove upip resources from dict again
This module encapsulates all the main operations the app library is expect to
perform on a given checkout. It's intentionally kept in one file to make it easier
@ -72,14 +73,30 @@ def get_resources(path):
if sub_path.startswith(".") or sub_path == "__pycache__":
continue
full_path = os.path.join(path, sub_path)
if os.path.islink(full_path):
continue
if os.path.isfile(full_path):
result[sub_path] = {"type": "root", "files": {sub_path: None}}
continue
files = _scan_files(full_path, sub_path)
if sub_path in ["lib", "shared"]:
files = _scan_files(full_path, sub_path)
for rel_path in files:
result[rel_path] = {"type": sub_path, "files": {rel_path: None}}
elif sub_path == "upip":
for upip_lib in os.listdir(full_path):
if upip_lib.startswith(".") or upip_lib == "__pycache__":
continue
full_lib_path = os.path.join(full_path, upip_lib)
files = {}
if os.path.isfile(full_lib_path):
files = {full_lib_path: None}
upip_lib = upip_lib.rsplit('.', 1)[0]
else:
for rel_path in _scan_files(full_lib_path, os.path.join(sub_path, upip_lib)):
files[rel_path] = None
result["upip:%s" % upip_lib] = {"type": sub_path, "files": files}
else:
files = _scan_files(full_path, sub_path)
result[sub_path] = {"type": "app", "files": {}}
for rel_path in files:
result[sub_path]["files"][rel_path] = None
@ -131,16 +148,10 @@ def add_metadata(path, resources):
def _normalize_metadata(metadata):
metadata['description'] = metadata.pop('doc')
if 'dependencies' in metadata:
metadata['dependencies'] = [_normalize_lib(l) for l in metadata.pop('dependencies')]
metadata['dependencies'] = [normalize_dependency(l) for l in metadata.pop('dependencies')]
return metadata
def _normalize_lib(lib):
"""lib dependencies can be shortened to just their module name"""
if "." in lib or "/" in lib:
return lib
return "lib/%s.py" % lib
"""
resolve_dependencies(resources)
@ -195,6 +206,21 @@ def _validate_resource(path, resource):
if 'categories' not in resource or (not isinstance(resource['categories'], list)) or len(resource['categories']) == 0:
resource.setdefault("errors", []).append("___categories___ list is required in main.py but not found")
"""
remove_upip(resources)
upip adds over a 100 resources to the list. Some of them have broken validation as well, so it's
useful to remove them after resolving dependencies.
"""
def remove_upip(resources):
to_delete = []
for key, resource in resources.items():
if resource['type'] == "upip":
to_delete.append(key)
for key in to_delete:
del resources[key]
"""
helpers
"""
@ -209,3 +235,12 @@ def get_error_summary(resources):
summary += "\n"
return summary.strip()
def pretty_print_resources(resources):
import json
return json.dumps(resources, indent=4)
def normalize_dependency(dependency):
"""lib dependencies can be shortened to just their module name"""
if "." in dependency or "/" in dependency or "upip:" in dependency:
return dependency
return "lib/%s.py" % dependency

View File

@ -1,30 +1,46 @@
import os, glob, shutil, sys
import os, shutil, sys, fnmatch
def sync(storage, patterns):
def sync(storage, patterns, resources, verbose):
root = get_root()
# Add all paths that are already files
paths = [os.path.join(root, p) for p in (patterns or []) if os.path.isfile(os.path.join(root, p))]
paths = set([p for p in (patterns or []) if os.path.isfile(os.path.join(root, p))])
if patterns:
new_patterns = []
patterns = [os.path.join(root, p, "**") for p in patterns]
else:
patterns = ["**/**", "boot.py"]
# Always copy boot.py
paths.add("boot.py")
# wifi.json
wifi_path = os.path.join(root, "wifi.json")
if os.path.isfile(wifi_path):
paths.add(wifi_path)
if not patterns:
patterns = ["*"]
for pattern in patterns:
for path in glob.glob(pattern):
paths.append(path)
if len(paths) == 0:
print("No files to copy found for pattern %s" % patterns)
sys.exit(1)
found = False
for key, resource in resources.items():
if fnmatch.fnmatch(key, pattern):
found = True
if verbose:
print("Resource %s is going to be synced" % key)
for path in resource['files'].keys():
paths.add(path)
if not found:
print("WARN: No resources to copy found for pattern %s" % patterns)
if not verbose:
print("Copying %s files: " % len(paths), end="")
for path in paths:
rel_path = os.path.relpath(path, root)
if rel_path.startswith("."):
if not path:
continue
print("Copying %s..." % rel_path)
rel_path = os.path.relpath(path, root)
if rel_path.startswith(".") or os.path.isdir(path) or os.path.islink(path):
continue
if verbose:
print("Copying %s..." % rel_path)
else:
print(".", end="")
target = os.path.join(storage, rel_path)
target_dir = os.path.dirname(target)
@ -35,9 +51,10 @@ def sync(storage, patterns):
os.makedirs(target_dir)
shutil.copy2(path, target)
else:
if verbose:
print("Files copied successfully")
else:
print(" DONE")
def set_boot_app(storage, app_to_boot):
path = os.path.join(storage, 'once.txt')
@ -47,7 +64,8 @@ def set_boot_app(storage, app_to_boot):
pass
with open(path, 'w') as f:
f.write(app_to_boot + "\n")
print("setting next boot to %s" % app_to_boot)
if app_to_boot:
print("setting next boot to %s" % app_to_boot)
def get_root():
root = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..'))

View File

@ -16,7 +16,7 @@ $ tilda_tools sync
Update files in folder(s) to match current local version
$ tilda_tools sync my_game shared
$ tilda_tools sync <folder1> <folder2> ...
$ tilda_tools sync <pattern1> <pattern2> ...
Sync (as above), but execute my_app after reboot
$ tilda_toold.py sync --boot my_app [<other sync parameter>]
@ -36,6 +36,9 @@ $ tilda_tools test
Update firmware on badge (warning, this will delete all settings etc. stored on the badge!)
$ tilda_tools firmware-update
Setup wifi.json to be copied to the badge on every sync
$ tilda_tools wifi
Common parameters
-----------------
@ -45,16 +48,18 @@ Common parameters
"""
import sys, glob
import sync, pyboard_util
import sync, pyboard_util, wifi, pydfu_util
from resources import *
def main():
import argparse
cmd_parser = argparse.ArgumentParser(description='Toolchain for working with the TiLDA Mk4')
cmd_parser.add_argument('command', nargs=1, help='command [test|reset|sync|run]')
cmd_parser.add_argument('command', nargs=1, help='command [test|reset|sync|run|validate|wifi|firmware-update]', choices=['test', 'reset', 'sync', 'validate', 'run', 'wifi', 'firmware-update'])
cmd_parser.add_argument('-d', '--device', help='the serial device of the badge')
cmd_parser.add_argument('-s', '--storage', help='the usb mass storage path of the badge')
cmd_parser.add_argument('-b', '--baudrate', default=115200, help='the baud rate of the serial device')
cmd_parser.add_argument('-v', '--verbose', action='store_true', help='adds more output')
cmd_parser.add_argument('--print_resources', action='store_true', help='prints resources in json')
cmd_parser.add_argument('--boot', help='defines which app to boot into after reboot')
cmd_parser.add_argument('--run', help='like run, but after a sync')
cmd_parser.add_argument('-w', '--wait', default=0, type=int, help='seconds to wait for USB connected board to become available')
@ -63,11 +68,20 @@ def main():
command = args.command[0]
path = sync.get_root()
if command in ["test", "validate"]:
if command == "firmware-update":
pydfu_util.firmware_update(args.verbose)
if command == "wifi":
wifi.select_wifi()
if command in ["test", "validate", "sync"]:
resources = get_resources(path)
add_metadata(path, resources)
resolve_dependencies(resources)
validate(path, resources)
resolve_dependencies(resources)
remove_upip(resources)
if args.print_resources:
print(pretty_print_resources(resources))
errors = get_error_summary(resources)
if errors:
print("Problems found:\n")
@ -76,15 +90,20 @@ def main():
print("Local Test: PASS")
if command == "test":
command = "sync"
args.path = []
args.run = "test/main.py"
if len(args.paths) == 0:
args.run = "test/main.py"
else:
if "." not in args.paths[0]:
args.paths[0] = "lib/%s.py" % args.paths[0]
args.run = args.paths[0]
if command in ["reset", "sync"]:
pyboard_util.stop_badge(args)
pyboard_util.stop_badge(args, args.verbose)
if command == "sync":
paths = args.paths if len(args.paths) else None
sync.sync(get_storage(args), paths)
sync.sync(get_storage(args), paths, resources, args.verbose)
if command in ["reset", "sync"]:
sync.set_boot_app(get_storage(args), args.boot or "")
@ -94,6 +113,7 @@ def main():
args.paths = [args.run]
if command == "run":
pyboard_util.check_run(args)
pyboard_util.run(args)
@ -101,7 +121,7 @@ def main():
def find_storage():
# todo: find solution for windows and linux
for pattern in ['/Volumes/PYBFLASH']:
for pattern in ['/Volumes/PYBFLASH', '/Volumes/NO NAME']:
for path in glob.glob(pattern):
return path
print("Couldn't find badge storage - Please make it's plugged in and reset it if necessary")

14
.development/update_upip.sh Executable file
View File

@ -0,0 +1,14 @@
#!/usr/bin/env bash
TARGET=$(dirname `pwd`)"/upip"
rm -rf "/tmp/upip.zip"
curl -L "https://github.com/micropython/micropython-lib/archive/master.zip" -o "/tmp/upip.zip"
rm -rf "/tmp/upip"
unzip -q -a "/tmp/upip.zip" -d "/tmp/upip"
cd "/tmp/upip/micropython-lib-master"
rm -rf "$TARGET/*"
for d in `find . -maxdepth 1 -type d ! -name ".*"`; do
echo $d;
find "$d" -maxdepth 1 -mindepth 1 \( -name '*.py' -not -name 'test_*' -not -name 'example_*' -not -name 'setup.py' -size +10c \) -or \( -type d -not -name 'dist' -not -name '*.egg-info' -not -name '__pycache__' \) | xargs -I{} bash -c -- 'ditto {} "'"$TARGET"'/"`echo "{}" | sed -e "s/\.\/[^\/]*\///"`';
done

13
.development/wifi.py Normal file
View File

@ -0,0 +1,13 @@
import os, sync, json
def select_wifi():
ssid = input('Enter wifi name (SSID): ')
pw = input('Enter wifi password, leave empty for open network: ')
with open(os.path.join(sync.get_root(), "wifi.json"), "wt") as file:
if pw:
conn_details = {"ssid": ssid, "pw": pw}
else:
conn_details = {"ssid": ssid}
file.write(json.dumps(conn_details))
print("wifi.json created - It will be transfered to the badge on the next sync")

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
.DS_Store
__pycache__
wifi.json

View File

@ -1,7 +1,9 @@
import pyb, os, micropython
import pyb, os, micropython, sys
micropython.alloc_emergency_exception_buf(100)
sys.path.append('/flash/upip')
os.sync()
root = os.listdir()

View File

@ -1,3 +1,262 @@
"""HTTP library specially tied to TiLDAs functionality"""
"""HTTP library specially tied to TiLDAs functionality
Somewhat inspired by "request".
Current known issues:
* HTTPS is not supported
*
"""
___license___ = "MIT"
___dependencies___ = ["urlencode"]
import usocket, ujson, os, time, gc, wifi
from urlencode import urlencode
"""Usage
from http_client import *
print(get("http://example.com").raise_for_status().content)
post("http://mydomain.co.uk/api/post", data="SOMETHING").raise_for_status().close() # If response is not consumed you need to close manually
# Or, if you prefer the with syntax:
with post("http://mydomain.co.uk/api/post", urlencoded="SOMETHING") as response:
response.raise_for_error() # No manual close needed
"""
SUPPORT_TIMEOUT = hasattr(usocket.socket, 'settimeout')
CONTENT_TYPE_JSON = 'application/json'
BUFFER_SIZE = 1024
class Response(object):
def __init__(self):
self.encoding = 'utf-8'
self.headers = {}
self.status = None
self.socket = None
self._content = None
# Hands the responsibility for a socket over to this reponse. This needs to happen
# before any content can be inspected
def add_socket(self, socket, content_so_far):
self.content_so_far = content_so_far
self.socket = socket
@property
def content(self, timeout=90):
start_time = time.time()
if not self._content:
if not self.socket:
raise OSError("Invalid response socket state. Has the content been downloaded instead?")
try:
if "Content-Length" in self.headers:
content_length = int(self.headers["Content-Length"])
elif "content-length" in self.headers:
content_length = int(self.headers["content-length"])
else:
raise Exception("No Content-Length")
self._content = self.content_so_far
del self.content_so_far
while len(self._content) < content_length:
buf = self.socket.recv(BUFFER_SIZE)
self._content += buf
if (time.time() - start_time) > timeout:
raise Exception("HTTP request timeout")
finally:
self.close()
return self._content;
@property
def text(self):
return str(self.content, self.encoding) if self.content else ''
# If you don't use the content of a Response at all you need to manually close it
def close(self):
if self.socket is not None:
self.socket.close()
self.socket = None
def json(self):
return ujson.loads(self.text)
# Writes content into a file. This function will write while receiving, which avoids
# having to load all content into memory
def download_to(self, target, timeout=90):
start_time = time.time()
if not self.socket:
raise OSError("Invalid response socket state. Has the content already been consumed?")
try:
if "Content-Length" in self.headers:
remaining = int(self.headers["Content-Length"])
elif "content-length" in self.headers:
remaining = int(self.headers["content-length"])
else:
raise Exception("No Content-Length")
with open(target, 'wb') as f:
f.write(self.content_so_far)
remaining -= len(self.content_so_far)
del self.content_so_far
while remaining > 0:
buf = self.socket.recv(BUFFER_SIZE)
f.write(buf)
remaining -= len(buf)
if (time.time() - start_time) > timeout:
raise Exception("HTTP request timeout")
f.flush()
os.sync()
finally:
self.close()
def raise_for_status(self):
if 400 <= self.status < 500:
raise OSError('Client error: %s' % self.status)
if 500 <= self.status < 600:
raise OSError('Server error: %s' % self.status)
return self
# In case you want to use "with"
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.close()
def open_http_socket(method, url, json=None, timeout=None, headers=None, data=None, params=None):
# This will immediately return if we're already connected, otherwise
# it'll attempt to connect or prompt for a new network. Proceeding
# without an active network connection will cause the getaddrinfo to
# fail.
wifi.connect(
wait=True,
show_wait_message=False,
prompt_on_fail=True,
dialog_title='TiLDA Wifi'
)
urlparts = url.split('/', 3)
proto = urlparts[0]
host = urlparts[2]
urlpath = '' if len(urlparts) < 4 else urlparts[3]
if proto == 'http:':
port = 80
elif proto == 'https:':
raise OSError("HTTPS is currently not supported")
port = 443
else:
raise OSError('Unsupported protocol: %s' % proto[:-1])
if ':' in host:
host, port = host.split(':')
port = int(port)
if data is not None:
if isinstance(data, str):
content = data
content_type = "text/plain; charset=UTF-8"
else:
content = urlencode(data)
content_type = "application/x-www-form-urlencoded"
elif json is not None:
content = ujson.dumps(json)
content_type = CONTENT_TYPE_JSON
else:
content = None
# ToDo: Handle IPv6 addresses
if is_ipv4_address(host):
addr = (host, port)
else:
ai = usocket.getaddrinfo(host, port)
addr = ai[0][4]
sock = None
if proto == 'https:':
sock = usocket.socket(usocket.AF_INET, usocket.SOCK_STREAM, usocket.SEC_SOCKET)
else:
sock = usocket.socket()
if params:
urlpath += "?" + urlencode(params)
sock.connect(addr)
if proto == 'https:':
sock.settimeout(0) # Actually make timeouts working properly with ssl
sock.send('%s /%s HTTP/1.0\r\nHost: %s\r\n' % (method, urlpath, host))
if headers is not None:
for header in headers.items():
sock.send('%s: %s\r\n' % header)
if content is not None:
sock.send('content-length: %s\r\n' % len(content))
sock.send('content-type: %s\r\n' % content_type)
sock.send('\r\n')
sock.send(content)
else:
sock.send('\r\n')
return sock
# Adapted from upip
def request(method, url, json=None, timeout=None, headers=None, data=None, params=None):
sock = open_http_socket(method, url, json, timeout, headers, data, params)
try:
response = Response()
state = 1
hbuf = b""
while True:
buf = sock.recv(BUFFER_SIZE)
if state == 1: # Status
nl = buf.find(b"\n")
if nl > -1:
hbuf += buf[:nl - 1]
response.status = int(hbuf.split(b' ')[1])
state = 2
hbuf = b"";
buf = buf[nl + 1:]
else:
hbuf += buf
if state == 2: # Headers
hbuf += buf
nl = hbuf.find(b"\n")
while nl > -1:
if nl < 2:
buf = hbuf[2:]
hbuf = None
state = 3
break
header = hbuf[:nl - 1].decode("utf8").split(':', 3)
response.headers[header[0].strip()] = header[1].strip()
hbuf = hbuf[nl + 1:]
nl = hbuf.find(b"\n")
if state == 3: # Content
response.add_socket(sock, buf)
sock = None # It's not our responsibility to close the socket anymore
return response
finally:
if sock: sock.close()
gc.collect()
def get(url, **kwargs):
return request('GET', url, **kwargs)
def post(url, **kwargs):
return request('POST', url, **kwargs)
def is_ipv4_address(address):
octets = address.split('.')
try:
valid_octets = [x for x in octets if 0 <= int(x) and int(x) <= 255]
return len(valid_octets) == 4
except Exception:
return False

View File

@ -1,11 +1,4 @@
"""This app's purpose is to run a series of tests against library code
Once successful it displays and prints 'ok' on the screen.
Please make sure that all tests pass before sending a PR. You can easily
do this by running "tilda_tools test". Thank you for keeping all the
tests green! *face-throwing-a-kiss-emoji*
"""
"""Tests for database"""
___license___ = "MIT"
___dependencies___ = ["unittest", "database"]

View File

@ -1,21 +1,47 @@
"""This app's purpose is to run a series of tests against library code
Once successful it displays and prints 'ok' on the screen.
Please make sure that all tests pass before sending a PR. You can easily
do this by running "tilda_tools test". Thank you for keeping all the
tests green! *face-throwing-a-kiss-emoji*
"""
"""Tests for http"""
___license___ = "MIT"
___dependencies___ = ["unittest"]
___dependencies___ = ["unittest", "http", "wifi"]
import unittest
from http import *
import wifi
class TestHttp(unittest.TestCase):
def test_foo(self):
pass
def setUpClass(self):
wifi.connect()
def test_get_with_https(self):
with self.assertRaises(OSError) as context:
get("https://httpbin.org/get")
self.assertIn("HTTPS is currently not supported", str(context.exception))
def test_get(self):
with get("http://httpbin.org/get", params={"foo": "bar"}, headers={"accept": "application/json"}) as response:
self.assertEqual(response.headers["Content-Type"], "application/json")
self.assertEqual(response.status, 200)
content = response.json()
self.assertEqual(content["headers"]["Accept"], "application/json")
self.assertEqual(content["args"], {"foo":"bar"})
def test_post_form(self):
with post("http://httpbin.org/post", data={"foo": "bar"}).raise_for_status() as response:
content = response.json()
self.assertEqual(content["headers"]["Content-Type"], "application/x-www-form-urlencoded")
self.assertEqual(content["form"], {"foo":"bar"})
def test_post_string(self):
with post("http://httpbin.org/post", data="foobar").raise_for_status() as response:
content = response.json()
self.assertEqual(content["headers"]["Content-Type"], "text/plain; charset=UTF-8")
self.assertEqual(content["data"], "foobar")
def test_post_json(self):
with post("http://httpbin.org/post", json={"foo":"bar"}).raise_for_status() as response:
content = response.json()
self.assertEqual(content["headers"]["Content-Type"], "application/json")
self.assertEqual(content["json"], {"foo":"bar"})
if __name__ == "__main__":

19
lib/test_urlencode.py Normal file
View File

@ -0,0 +1,19 @@
"""Tests for urlencode"""
___license___ = "MIT"
___dependencies___ = ["unittest", "urlencode"]
import unittest
from urlencode import *
class TestUrlencode(unittest.TestCase):
def test_urlencode(self):
self.assertEqual(
urlencode({"täst":"!£$%(*&^%()", "l": "😃"}),
"l=%F0%9F%98%83&t%C3%A4st=%21%C2%A3%24%25%28%2A%26%5E%25%28%29"
)
if __name__ == "__main__":
TestUrlencode().run_standalone()

View File

@ -1,8 +1,12 @@
"""Base libarary for test cases"""
"""Base libarary for test cases
See https://github.com/python/cpython/blob/master/Lib/unittest/case.py for
some of the code copied here
"""
___license___ = "MIT"
import sys
import sys, ugfx
class SkipTest(Exception):
"""Indicates a test has been skipped"""
@ -44,6 +48,7 @@ class TestCase(object):
return self.count_fail == 0
def run_standalone(self):
ugfx.clear(0xFFFFFF)
self.run()
print_result(self.count_pass, self.count_fail, self.count_skip)
@ -72,8 +77,116 @@ class TestCase(object):
def assertFalse(self, actual):
self.assertEqual(actual, False)
def assertRaises(self, expected_exception, *args, **kwargs):
context = _AssertRaisesContext(expected_exception, self)
return context.handle('assertRaises', args, kwargs)
def assertIn(self, sub, actual):
if not sub in actual:
raise FailTest("Expected %s to be in %s" % (sub, actual))
def skip(self):
raise SkipTest()
def print_result(count_pass, count_fail, count_skip):
print("PASS: %s FAIL: %s SKIP: %s" % (count_pass, count_fail, count_skip))
###########################################
#### Bits copied straight from cpython ####
###########################################
class _BaseTestCaseContext:
def __init__(self, test_case):
self.test_case = test_case
def _raiseFailure(self, standardMsg):
msg = self.test_case._formatMessage(self.msg, standardMsg)
raise self.test_case.failureException(msg)
class _AssertRaisesBaseContext(_BaseTestCaseContext):
def __init__(self, expected, test_case, expected_regex=None):
_BaseTestCaseContext.__init__(self, test_case)
self.expected = expected
self.test_case = test_case
if expected_regex is not None:
expected_regex = re.compile(expected_regex)
self.expected_regex = expected_regex
self.obj_name = None
self.msg = None
def handle(self, name, args, kwargs):
"""
If args is empty, assertRaises/Warns is being used as a
context manager, so check for a 'msg' kwarg and return self.
If args is not empty, call a callable passing positional and keyword
arguments.
"""
try:
if not _is_subtype(self.expected, self._base_type):
raise TypeError('%s() arg 1 must be %s' %
(name, self._base_type_str))
if args and args[0] is None:
warnings.warn("callable is None",
DeprecationWarning, 3)
args = ()
if not args:
self.msg = kwargs.pop('msg', None)
if kwargs:
warnings.warn('%r is an invalid keyword argument for '
'this function' % next(iter(kwargs)),
DeprecationWarning, 3)
return self
callable_obj, *args = args
try:
self.obj_name = callable_obj.__name__
except AttributeError:
self.obj_name = str(callable_obj)
with self:
callable_obj(*args, **kwargs)
finally:
# bpo-23890: manually break a reference cycle
self = None
class _AssertRaisesContext(_AssertRaisesBaseContext):
"""A context manager used to implement TestCase.assertRaises* methods."""
_base_type = BaseException
_base_type_str = 'an exception type or tuple of exception types'
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, tb):
if exc_type is None:
try:
exc_name = self.expected.__name__
except AttributeError:
exc_name = str(self.expected)
if self.obj_name:
self._raiseFailure("{} not raised by {}".format(exc_name,
self.obj_name))
else:
self._raiseFailure("{} not raised".format(exc_name))
if not issubclass(exc_type, self.expected):
# let unexpected exceptions pass through
return False
# store exception
self.exception = exc_value
if self.expected_regex is None:
return True
expected_regex = self.expected_regex
if not expected_regex.search(str(exc_value)):
self._raiseFailure('"{}" does not match "{}"'.format(
expected_regex.pattern, str(exc_value)))
return True
def _is_subtype(expected, basetype):
if isinstance(expected, tuple):
return all(_is_subtype(e, basetype) for e in expected)
return isinstance(expected, type) and issubclass(expected, basetype)

133
lib/urlencode.py Normal file
View File

@ -0,0 +1,133 @@
"""URL encoding helper
Mostly taken from urllib.parse (which is sadly too large to be imported directly)
I've removed most of the comment to make it easier on micropython
"""
___license___ = "Python"
___dependencies___ = ["upip:collections"]
from collections.defaultdict import defaultdict
_ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
b'abcdefghijklmnopqrstuvwxyz'
b'0123456789'
b'_.-')
_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)
_safe_quoters = {}
class Quoter(defaultdict):
def __init__(self, safe):
"""safe: bytes object."""
self.safe = _ALWAYS_SAFE.union(safe)
def __repr__(self):
# Without this, will just display as a defaultdict
return "<Quoter %r>" % dict(self)
def __missing__(self, b):
# Handle a cache miss. Store quoted string in cache and return.
res = chr(b) if b in self.safe else '%{:02X}'.format(b)
self[b] = res
return res
def quote(string, safe='/', encoding=None, errors=None):
if isinstance(string, str):
if not string:
return string
if encoding is None:
encoding = 'utf-8'
if errors is None:
errors = 'strict'
string = string.encode(encoding, errors)
else:
if encoding is not None:
raise TypeError("quote() doesn't support 'encoding' for bytes")
if errors is not None:
raise TypeError("quote() doesn't support 'errors' for bytes")
return quote_from_bytes(string, safe)
def quote_plus(string, safe='', encoding=None, errors=None):
if ((isinstance(string, str) and ' ' not in string) or
(isinstance(string, bytes) and b' ' not in string)):
return quote(string, safe, encoding, errors)
if isinstance(safe, str):
space = ' '
else:
space = b' '
string = quote(string, safe + space, encoding, errors)
return string.replace(' ', '+')
def quote_from_bytes(bs, safe='/'):
if not isinstance(bs, (bytes, bytearray)):
raise TypeError("quote_from_bytes() expected bytes")
if not bs:
return ''
if isinstance(safe, str):
safe = safe.encode('ascii', 'ignore')
else:
safe = bytes([c for c in safe if c < 128])
if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
return bs.decode()
try:
quoter = _safe_quoters[safe]
except KeyError:
_safe_quoters[safe] = quoter = Quoter(safe).__getitem__
return ''.join([quoter(char) for char in bs])
def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
if hasattr(query, "items"):
query = query.items()
else:
try:
if len(query) and not isinstance(query[0], tuple):
raise TypeError
except TypeError:
raise TypeError("not a valid non-string sequence "
"or mapping object")#.with_traceback(tb)
l = []
if not doseq:
for k, v in query:
if isinstance(k, bytes):
k = quote_plus(k, safe)
else:
k = quote_plus(str(k), safe, encoding, errors)
if isinstance(v, bytes):
v = quote_plus(v, safe)
else:
v = quote_plus(str(v), safe, encoding, errors)
l.append(k + '=' + v)
else:
for k, v in query:
if isinstance(k, bytes):
k = quote_plus(k, safe)
else:
k = quote_plus(str(k), safe, encoding, errors)
if isinstance(v, bytes):
v = quote_plus(v, safe)
l.append(k + '=' + v)
elif isinstance(v, str):
v = quote_plus(v, safe, encoding, errors)
l.append(k + '=' + v)
else:
try:
# Is this a sufficient test for sequence-ness?
x = len(v)
except TypeError:
# not a sequence
v = quote_plus(str(v), safe, encoding, errors)
l.append(k + '=' + v)
else:
# loop over the sequence
for elt in v:
if isinstance(elt, bytes):
elt = quote_plus(elt, safe)
else:
elt = quote_plus(str(elt), safe, encoding, errors)
l.append(k + '=' + elt)
return '&'.join(l)

View File

@ -10,11 +10,12 @@ tests green! *face-throwing-a-kiss-emoji*
___license___ = "MIT"
___categories___ = ["Development"]
___name___ = "Integration test app"
___dependencies___ = ["unittest", "test_database", "test_http"]
___dependencies___ = ["unittest", "test_database", "test_http", "test_urlencode"]
# Add all tests that need to be run here:
import test_database
import test_http
import test_urlencode
# run
import sys, unittest

7
upip/__future__.py Normal file
View File

@ -0,0 +1,7 @@
nested_scopes = True
generators = True
division = True
absolute_import = True
with_statement = True
print_function = True
unicode_literals = True

34
upip/_libc.py Normal file
View File

@ -0,0 +1,34 @@
import ffi
import sys
_h = None
names = ('libc.so', 'libc.so.0', 'libc.so.6', 'libc.dylib')
def get():
global _h
if _h:
return _h
err = None
for n in names:
try:
_h = ffi.open(n)
return _h
except OSError as e:
err = e
raise err
def set_names(n):
global names
names = n
# Find out bitness of the platform, even if long ints are not supported
# TODO: All bitness differences should be removed from micropython-lib, and
# this snippet too.
bitness = 1
v = sys.maxsize
while v:
bitness += 1
v >>= 1

395
upip/_markupbase.py Normal file
View File

@ -0,0 +1,395 @@
"""Shared support for scanning document type declarations in HTML and XHTML.
This module is used as a foundation for the html.parser module. It has no
documented public API and should not be used directly.
"""
import re
_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match
_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match
_commentclose = re.compile(r'--\s*>')
_markedsectionclose = re.compile(r']\s*]\s*>')
# An analysis of the MS-Word extensions is available at
# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf
_msmarkedsectionclose = re.compile(r']\s*>')
del re
class ParserBase:
"""Parser base class which provides some common support methods used
by the SGML/HTML and XHTML parsers."""
def __init__(self):
if self.__class__ is ParserBase:
raise RuntimeError(
"_markupbase.ParserBase must be subclassed")
def error(self, message):
raise NotImplementedError(
"subclasses of ParserBase must override error()")
def reset(self):
self.lineno = 1
self.offset = 0
def getpos(self):
"""Return current line number and offset."""
return self.lineno, self.offset
# Internal -- update line number and offset. This should be
# called for each piece of data exactly once, in order -- in other
# words the concatenation of all the input strings to this
# function should be exactly the entire input.
def updatepos(self, i, j):
if i >= j:
return j
rawdata = self.rawdata
nlines = rawdata.count("\n", i, j)
if nlines:
self.lineno = self.lineno + nlines
pos = rawdata.rindex("\n", i, j) # Should not fail
self.offset = j-(pos+1)
else:
self.offset = self.offset + j-i
return j
_decl_otherchars = ''
# Internal -- parse declaration (for use by subclasses).
def parse_declaration(self, i):
# This is some sort of declaration; in "HTML as
# deployed," this should only be the document type
# declaration ("<!DOCTYPE html...>").
# ISO 8879:1986, however, has more complex
# declaration syntax for elements in <!...>, including:
# --comment--
# [marked section]
# name in the following list: ENTITY, DOCTYPE, ELEMENT,
# ATTLIST, NOTATION, SHORTREF, USEMAP,
# LINKTYPE, LINK, IDLINK, USELINK, SYSTEM
rawdata = self.rawdata
j = i + 2
assert rawdata[i:j] == "<!", "unexpected call to parse_declaration"
if rawdata[j:j+1] == ">":
# the empty comment <!>
return j + 1
if rawdata[j:j+1] in ("-", ""):
# Start of comment followed by buffer boundary,
# or just a buffer boundary.
return -1
# A simple, practical version could look like: ((name|stringlit) S*) + '>'
n = len(rawdata)
if rawdata[j:j+2] == '--': #comment
# Locate --.*-- as the body of the comment
return self.parse_comment(i)
elif rawdata[j] == '[': #marked section
# Locate [statusWord [...arbitrary SGML...]] as the body of the marked section
# Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA
# Note that this is extended by Microsoft Office "Save as Web" function
# to include [if...] and [endif].
return self.parse_marked_section(i)
else: #all other declaration elements
decltype, j = self._scan_name(j, i)
if j < 0:
return j
if decltype == "doctype":
self._decl_otherchars = ''
while j < n:
c = rawdata[j]
if c == ">":
# end of declaration syntax
data = rawdata[i+2:j]
if decltype == "doctype":
self.handle_decl(data)
else:
# According to the HTML5 specs sections "8.2.4.44 Bogus
# comment state" and "8.2.4.45 Markup declaration open
# state", a comment token should be emitted.
# Calling unknown_decl provides more flexibility though.
self.unknown_decl(data)
return j + 1
if c in "\"'":
m = _declstringlit_match(rawdata, j)
if not m:
return -1 # incomplete
j = m.end()
elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
name, j = self._scan_name(j, i)
elif c in self._decl_otherchars:
j = j + 1
elif c == "[":
# this could be handled in a separate doctype parser
if decltype == "doctype":
j = self._parse_doctype_subset(j + 1, i)
elif decltype in {"attlist", "linktype", "link", "element"}:
# must tolerate []'d groups in a content model in an element declaration
# also in data attribute specifications of attlist declaration
# also link type declaration subsets in linktype declarations
# also link attribute specification lists in link declarations
self.error("unsupported '[' char in %s declaration" % decltype)
else:
self.error("unexpected '[' char in declaration")
else:
self.error(
"unexpected %r char in declaration" % rawdata[j])
if j < 0:
return j
return -1 # incomplete
# Internal -- parse a marked section
# Override this to handle MS-word extension syntax <![if word]>content<![endif]>
def parse_marked_section(self, i, report=1):
rawdata= self.rawdata
assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()"
sectName, j = self._scan_name( i+3, i )
if j < 0:
return j
if sectName in {"temp", "cdata", "ignore", "include", "rcdata"}:
# look for standard ]]> ending
match= _markedsectionclose.search(rawdata, i+3)
elif sectName in {"if", "else", "endif"}:
# look for MS Office ]> ending
match= _msmarkedsectionclose.search(rawdata, i+3)
else:
self.error('unknown status keyword %r in marked section' % rawdata[i+3:j])
if not match:
return -1
if report:
j = match.start(0)
self.unknown_decl(rawdata[i+3: j])
return match.end(0)
# Internal -- parse comment, return length or -1 if not terminated
def parse_comment(self, i, report=1):
rawdata = self.rawdata
if rawdata[i:i+4] != '<!--':
self.error('unexpected call to parse_comment()')
match = _commentclose.search(rawdata, i+4)
if not match:
return -1
if report:
j = match.start(0)
self.handle_comment(rawdata[i+4: j])
return match.end(0)
# Internal -- scan past the internal subset in a <!DOCTYPE declaration,
# returning the index just past any whitespace following the trailing ']'.
def _parse_doctype_subset(self, i, declstartpos):
rawdata = self.rawdata
n = len(rawdata)
j = i
while j < n:
c = rawdata[j]
if c == "<":
s = rawdata[j:j+2]
if s == "<":
# end of buffer; incomplete
return -1
if s != "<!":
self.updatepos(declstartpos, j + 1)
self.error("unexpected char in internal subset (in %r)" % s)
if (j + 2) == n:
# end of buffer; incomplete
return -1
if (j + 4) > n:
# end of buffer; incomplete
return -1
if rawdata[j:j+4] == "<!--":
j = self.parse_comment(j, report=0)
if j < 0:
return j
continue
name, j = self._scan_name(j + 2, declstartpos)
if j == -1:
return -1
if name not in {"attlist", "element", "entity", "notation"}:
self.updatepos(declstartpos, j + 2)
self.error(
"unknown declaration %r in internal subset" % name)
# handle the individual names
meth = getattr(self, "_parse_doctype_" + name)
j = meth(j, declstartpos)
if j < 0:
return j
elif c == "%":
# parameter entity reference
if (j + 1) == n:
# end of buffer; incomplete
return -1
s, j = self._scan_name(j + 1, declstartpos)
if j < 0:
return j
if rawdata[j] == ";":
j = j + 1
elif c == "]":
j = j + 1
while j < n and rawdata[j].isspace():
j = j + 1
if j < n:
if rawdata[j] == ">":
return j
self.updatepos(declstartpos, j)
self.error("unexpected char after internal subset")
else:
return -1
elif c.isspace():
j = j + 1
else:
self.updatepos(declstartpos, j)
self.error("unexpected char %r in internal subset" % c)
# end of buffer reached
return -1
# Internal -- scan past <!ELEMENT declarations
def _parse_doctype_element(self, i, declstartpos):
name, j = self._scan_name(i, declstartpos)
if j == -1:
return -1
# style content model; just skip until '>'
rawdata = self.rawdata
if '>' in rawdata[j:]:
return rawdata.find(">", j) + 1
return -1
# Internal -- scan past <!ATTLIST declarations
def _parse_doctype_attlist(self, i, declstartpos):
rawdata = self.rawdata
name, j = self._scan_name(i, declstartpos)
c = rawdata[j:j+1]
if c == "":
return -1
if c == ">":
return j + 1
while 1:
# scan a series of attribute descriptions; simplified:
# name type [value] [#constraint]
name, j = self._scan_name(j, declstartpos)
if j < 0:
return j
c = rawdata[j:j+1]
if c == "":
return -1
if c == "(":
# an enumerated type; look for ')'
if ")" in rawdata[j:]:
j = rawdata.find(")", j) + 1
else:
return -1
while rawdata[j:j+1].isspace():
j = j + 1
if not rawdata[j:]:
# end of buffer, incomplete
return -1
else:
name, j = self._scan_name(j, declstartpos)
c = rawdata[j:j+1]
if not c:
return -1
if c in "'\"":
m = _declstringlit_match(rawdata, j)
if m:
j = m.end()
else:
return -1
c = rawdata[j:j+1]
if not c:
return -1
if c == "#":
if rawdata[j:] == "#":
# end of buffer
return -1
name, j = self._scan_name(j + 1, declstartpos)
if j < 0:
return j
c = rawdata[j:j+1]
if not c:
return -1
if c == '>':
# all done
return j + 1
# Internal -- scan past <!NOTATION declarations
def _parse_doctype_notation(self, i, declstartpos):
name, j = self._scan_name(i, declstartpos)
if j < 0:
return j
rawdata = self.rawdata
while 1:
c = rawdata[j:j+1]
if not c:
# end of buffer; incomplete
return -1
if c == '>':
return j + 1
if c in "'\"":
m = _declstringlit_match(rawdata, j)
if not m:
return -1
j = m.end()
else:
name, j = self._scan_name(j, declstartpos)
if j < 0:
return j
# Internal -- scan past <!ENTITY declarations
def _parse_doctype_entity(self, i, declstartpos):
rawdata = self.rawdata
if rawdata[i:i+1] == "%":
j = i + 1
while 1:
c = rawdata[j:j+1]
if not c:
return -1
if c.isspace():
j = j + 1
else:
break
else:
j = i
name, j = self._scan_name(j, declstartpos)
if j < 0:
return j
while 1:
c = self.rawdata[j:j+1]
if not c:
return -1
if c in "'\"":
m = _declstringlit_match(rawdata, j)
if m:
j = m.end()
else:
return -1 # incomplete
elif c == ">":
return j + 1
else:
name, j = self._scan_name(j, declstartpos)
if j < 0:
return j
# Internal -- scan a name token and the new position and the token, or
# return -1 if we've reached the end of the buffer.
def _scan_name(self, i, declstartpos):
rawdata = self.rawdata
n = len(rawdata)
if i == n:
return None, -1
m = _declname_match(rawdata, i)
if m:
s = m.group()
name = s.strip()
if (i + len(s)) == n:
return None, -1 # end of buffer
return name.lower(), m.end()
else:
self.updatepos(declstartpos, i)
self.error("expected name token at %r"
% rawdata[declstartpos:declstartpos+20])
# To be overridden -- handlers for unknown objects
def unknown_decl(self, data):
pass

2
upip/abc.py Normal file
View File

@ -0,0 +1,2 @@
def abstractmethod(f):
return f

216
upip/argparse.py Normal file
View File

@ -0,0 +1,216 @@
"""
Minimal and functional version of CPython's argparse module.
"""
import sys
from ucollections import namedtuple
class _ArgError(BaseException):
pass
class _Arg:
def __init__(self, names, dest, action, nargs, const, default, help):
self.names = names
self.dest = dest
self.action = action
self.nargs = nargs
self.const = const
self.default = default
self.help = help
def parse(self, optname, args):
# parse args for this arg
if self.action == "store":
if self.nargs is None:
if args:
return args.pop(0)
else:
raise _ArgError("expecting value for %s" % optname)
elif self.nargs == "?":
if args:
return args.pop(0)
else:
return self.default
else:
if self.nargs == "*":
n = -1
elif self.nargs == "+":
if not args:
raise _ArgError("expecting value for %s" % optname)
n = -1
else:
n = int(self.nargs)
ret = []
stop_at_opt = True
while args and n != 0:
if stop_at_opt and args[0].startswith("-") and args[0] != "-":
if args[0] == "--":
stop_at_opt = False
args.pop(0)
else:
break
else:
ret.append(args.pop(0))
n -= 1
if n > 0:
raise _ArgError("expecting value for %s" % optname)
return ret
elif self.action == "store_const":
return self.const
else:
assert False
def _dest_from_optnames(opt_names):
dest = opt_names[0]
for name in opt_names:
if name.startswith("--"):
dest = name
break
return dest.lstrip("-").replace("-", "_")
class ArgumentParser:
def __init__(self, *, description=""):
self.description = description
self.opt = []
self.pos = []
def add_argument(self, *args, **kwargs):
action = kwargs.get("action", "store")
if action == "store_true":
action = "store_const"
const = True
default = kwargs.get("default", False)
elif action == "store_false":
action = "store_const"
const = False
default = kwargs.get("default", True)
else:
const = kwargs.get("const", None)
default = kwargs.get("default", None)
if args and args[0].startswith("-"):
list = self.opt
dest = kwargs.get("dest")
if dest is None:
dest = _dest_from_optnames(args)
else:
list = self.pos
dest = kwargs.get("dest")
if dest is None:
dest = args[0]
if not args:
args = [dest]
list.append(
_Arg(args, dest, action, kwargs.get("nargs", None),
const, default, kwargs.get("help", "")))
def usage(self, full):
# print short usage
print("usage: %s [-h]" % sys.argv[0], end="")
def render_arg(arg):
if arg.action == "store":
if arg.nargs is None:
return " %s" % arg.dest
if isinstance(arg.nargs, int):
return " %s(x%d)" % (arg.dest, arg.nargs)
else:
return " %s%s" % (arg.dest, arg.nargs)
else:
return ""
for opt in self.opt:
print(" [%s%s]" % (', '.join(opt.names), render_arg(opt)), end="")
for pos in self.pos:
print(render_arg(pos), end="")
print()
if not full:
return
# print full information
print()
if self.description:
print(self.description)
if self.pos:
print("\npositional args:")
for pos in self.pos:
print(" %-16s%s" % (pos.names[0], pos.help))
print("\noptional args:")
print(" -h, --help show this message and exit")
for opt in self.opt:
print(" %-16s%s" % (', '.join(opt.names) + render_arg(opt), opt.help))
def parse_args(self, args=None):
return self._parse_args_impl(args, False)
def parse_known_args(self, args=None):
return self._parse_args_impl(args, True)
def _parse_args_impl(self, args, return_unknown):
if args is None:
args = sys.argv[1:]
else:
args = args[:]
try:
return self._parse_args(args, return_unknown)
except _ArgError as e:
self.usage(False)
print("error:", e)
sys.exit(2)
def _parse_args(self, args, return_unknown):
# add optional args with defaults
arg_dest = []
arg_vals = []
for opt in self.opt:
arg_dest.append(opt.dest)
arg_vals.append(opt.default)
# deal with unknown arguments, if needed
unknown = []
def consume_unknown():
while args and not args[0].startswith("-"):
unknown.append(args.pop(0))
# parse all args
parsed_pos = False
while args or not parsed_pos:
if args and args[0].startswith("-") and args[0] != "-" and args[0] != "--":
# optional arg
a = args.pop(0)
if a in ("-h", "--help"):
self.usage(True)
sys.exit(0)
found = False
for i, opt in enumerate(self.opt):
if a in opt.names:
arg_vals[i] = opt.parse(a, args)
found = True
break
if not found:
if return_unknown:
unknown.append(a)
consume_unknown()
else:
raise _ArgError("unknown option %s" % a)
else:
# positional arg
if parsed_pos:
if return_unknown:
unknown = unknown + args
break
else:
raise _ArgError("extra args: %s" % " ".join(args))
for pos in self.pos:
arg_dest.append(pos.dest)
arg_vals.append(pos.parse(pos.names[0], args))
parsed_pos = True
if return_unknown:
consume_unknown()
# build and return named tuple with arg values
values = namedtuple("args", arg_dest)(*arg_vals)
return (values, unknown) if return_unknown else values

151
upip/asyncio_slow.py Normal file
View File

@ -0,0 +1,151 @@
import time
import logging
log = logging.getLogger("asyncio")
# Workaround for not being able to subclass builtin types
class LoopStop(Exception):
pass
class InvalidStateError(Exception):
pass
# Object not matching any other object
_sentinel = []
class EventLoop:
def __init__(self):
self.q = []
def call_soon(self, c, *args):
self.q.append((c, args))
def call_later(self, delay, c, *args):
def _delayed(c, args, delay):
yield from sleep(delay)
self.call_soon(c, *args)
Task(_delayed(c, args, delay))
def run_forever(self):
while self.q:
c = self.q.pop(0)
try:
c[0](*c[1])
except LoopStop:
return
# I mean, forever
while True:
time.sleep(1)
def stop(self):
def _cb():
raise LoopStop
self.call_soon(_cb)
def run_until_complete(self, coro):
t = ensure_future(coro)
t.add_done_callback(lambda a: self.stop())
self.run_forever()
def close(self):
pass
_def_event_loop = EventLoop()
class Future:
def __init__(self, loop=_def_event_loop):
self.loop = loop
self.res = _sentinel
self.cbs = []
def result(self):
if self.res is _sentinel:
raise InvalidStateError
return self.res
def add_done_callback(self, fn):
if self.res is _sentinel:
self.cbs.append(fn)
else:
self.loop.call_soon(fn, self)
def set_result(self, val):
self.res = val
for f in self.cbs:
f(self)
class Task(Future):
def __init__(self, coro, loop=_def_event_loop):
super().__init__()
self.loop = loop
self.c = coro
# upstream asyncio forces task to be scheduled on instantiation
self.loop.call_soon(self)
def __call__(self):
try:
next(self.c)
self.loop.call_soon(self)
except StopIteration as e:
log.debug("Coro finished: %s", self.c)
self.set_result(None)
def get_event_loop():
return _def_event_loop
# Decorator
def coroutine(f):
return f
def ensure_future(coro):
if isinstance(coro, Future):
return coro
return Task(coro)
class _Wait(Future):
def __init__(self, n):
Future.__init__(self)
self.n = n
def _done(self):
self.n -= 1
log.debug("Wait: remaining tasks: %d", self.n)
if not self.n:
self.set_result(None)
def __call__(self):
pass
def wait(coro_list, loop=_def_event_loop):
w = _Wait(len(coro_list))
for c in coro_list:
t = ensure_future(c)
t.add_done_callback(lambda val: w._done())
return w
def sleep(secs):
t = time.time()
log.debug("Started sleep at: %s, targetting: %s", t, t + secs)
while time.time() < t + secs:
time.sleep(0.01)
yield
log.debug("Finished sleeping %ss", secs)

414
upip/base64.py Normal file
View File

@ -0,0 +1,414 @@
#! /usr/bin/env python3
"""RFC 3548: Base16, Base32, Base64 Data Encodings"""
# Modified 04-Oct-1995 by Jack Jansen to use binascii module
# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
import re
import struct
import binascii
__all__ = [
# Legacy interface exports traditional RFC 1521 Base64 encodings
'encode', 'decode', 'encodebytes', 'decodebytes',
# Generalized interface for other encodings
'b64encode', 'b64decode', 'b32encode', 'b32decode',
'b16encode', 'b16decode',
# Standard Base64 encoding
'standard_b64encode', 'standard_b64decode',
# Some common Base64 alternatives. As referenced by RFC 3458, see thread
# starting at:
#
# http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
'urlsafe_b64encode', 'urlsafe_b64decode',
]
bytes_types = (bytes, bytearray) # Types acceptable as binary data
def _bytes_from_decode_data(s):
if isinstance(s, str):
try:
return s.encode('ascii')
# except UnicodeEncodeError:
except:
raise ValueError('string argument should contain only ASCII characters')
elif isinstance(s, bytes_types):
return s
else:
raise TypeError("argument should be bytes or ASCII string, not %s" % s.__class__.__name__)
# Base64 encoding/decoding uses binascii
def b64encode(s, altchars=None):
"""Encode a byte string using Base64.
s is the byte string to encode. Optional altchars must be a byte
string of length 2 which specifies an alternative alphabet for the
'+' and '/' characters. This allows an application to
e.g. generate url or filesystem safe Base64 strings.
The encoded byte string is returned.
"""
if not isinstance(s, bytes_types):
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
# Strip off the trailing newline
encoded = binascii.b2a_base64(s)[:-1]
if altchars is not None:
if not isinstance(altchars, bytes_types):
raise TypeError("expected bytes, not %s"
% altchars.__class__.__name__)
assert len(altchars) == 2, repr(altchars)
return encoded.translate(bytes.maketrans(b'+/', altchars))
return encoded
def b64decode(s, altchars=None, validate=False):
"""Decode a Base64 encoded byte string.
s is the byte string to decode. Optional altchars must be a
string of length 2 which specifies the alternative alphabet used
instead of the '+' and '/' characters.
The decoded string is returned. A binascii.Error is raised if s is
incorrectly padded.
If validate is False (the default), non-base64-alphabet characters are
discarded prior to the padding check. If validate is True,
non-base64-alphabet characters in the input result in a binascii.Error.
"""
s = _bytes_from_decode_data(s)
if altchars is not None:
altchars = _bytes_from_decode_data(altchars)
assert len(altchars) == 2, repr(altchars)
s = s.translate(bytes.maketrans(altchars, b'+/'))
if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
raise binascii.Error('Non-base64 digit found')
return binascii.a2b_base64(s)
def standard_b64encode(s):
"""Encode a byte string using the standard Base64 alphabet.
s is the byte string to encode. The encoded byte string is returned.
"""
return b64encode(s)
def standard_b64decode(s):
"""Decode a byte string encoded with the standard Base64 alphabet.
s is the byte string to decode. The decoded byte string is
returned. binascii.Error is raised if the input is incorrectly
padded or if there are non-alphabet characters present in the
input.
"""
return b64decode(s)
#_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
#_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
def urlsafe_b64encode(s):
"""Encode a byte string using a url-safe Base64 alphabet.
s is the byte string to encode. The encoded byte string is
returned. The alphabet uses '-' instead of '+' and '_' instead of
'/'.
"""
# return b64encode(s).translate(_urlsafe_encode_translation)
raise NotImplementedError()
def urlsafe_b64decode(s):
"""Decode a byte string encoded with the standard Base64 alphabet.
s is the byte string to decode. The decoded byte string is
returned. binascii.Error is raised if the input is incorrectly
padded or if there are non-alphabet characters present in the
input.
The alphabet uses '-' instead of '+' and '_' instead of '/'.
"""
# s = _bytes_from_decode_data(s)
# s = s.translate(_urlsafe_decode_translation)
# return b64decode(s)
raise NotImplementedError()
# Base32 encoding/decoding must be done in Python
_b32alphabet = {
0: b'A', 9: b'J', 18: b'S', 27: b'3',
1: b'B', 10: b'K', 19: b'T', 28: b'4',
2: b'C', 11: b'L', 20: b'U', 29: b'5',
3: b'D', 12: b'M', 21: b'V', 30: b'6',
4: b'E', 13: b'N', 22: b'W', 31: b'7',
5: b'F', 14: b'O', 23: b'X',
6: b'G', 15: b'P', 24: b'Y',
7: b'H', 16: b'Q', 25: b'Z',
8: b'I', 17: b'R', 26: b'2',
}
_b32tab = [v[0] for k, v in sorted(_b32alphabet.items())]
_b32rev = dict([(v[0], k) for k, v in _b32alphabet.items()])
def b32encode(s):
"""Encode a byte string using Base32.
s is the byte string to encode. The encoded byte string is returned.
"""
if not isinstance(s, bytes_types):
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
quanta, leftover = divmod(len(s), 5)
# Pad the last quantum with zero bits if necessary
if leftover:
s = s + bytes(5 - leftover) # Don't use += !
quanta += 1
encoded = bytearray()
for i in range(quanta):
# c1 and c2 are 16 bits wide, c3 is 8 bits wide. The intent of this
# code is to process the 40 bits in units of 5 bits. So we take the 1
# leftover bit of c1 and tack it onto c2. Then we take the 2 leftover
# bits of c2 and tack them onto c3. The shifts and masks are intended
# to give us values of exactly 5 bits in width.
c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5])
c2 += (c1 & 1) << 16 # 17 bits wide
c3 += (c2 & 3) << 8 # 10 bits wide
encoded += bytes([_b32tab[c1 >> 11], # bits 1 - 5
_b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10
_b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15
_b32tab[c2 >> 12], # bits 16 - 20 (1 - 5)
_b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10)
_b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15)
_b32tab[c3 >> 5], # bits 31 - 35 (1 - 5)
_b32tab[c3 & 0x1f], # bits 36 - 40 (1 - 5)
])
# Adjust for any leftover partial quanta
if leftover == 1:
encoded = encoded[:-6] + b'======'
elif leftover == 2:
encoded = encoded[:-4] + b'===='
elif leftover == 3:
encoded = encoded[:-3] + b'==='
elif leftover == 4:
encoded = encoded[:-1] + b'='
return bytes(encoded)
def b32decode(s, casefold=False, map01=None):
"""Decode a Base32 encoded byte string.
s is the byte string to decode. Optional casefold is a flag
specifying whether a lowercase alphabet is acceptable as input.
For security purposes, the default is False.
RFC 3548 allows for optional mapping of the digit 0 (zero) to the
letter O (oh), and for optional mapping of the digit 1 (one) to
either the letter I (eye) or letter L (el). The optional argument
map01 when not None, specifies which letter the digit 1 should be
mapped to (when map01 is not None, the digit 0 is always mapped to
the letter O). For security purposes the default is None, so that
0 and 1 are not allowed in the input.
The decoded byte string is returned. binascii.Error is raised if
the input is incorrectly padded or if there are non-alphabet
characters present in the input.
"""
s = _bytes_from_decode_data(s)
quanta, leftover = divmod(len(s), 8)
if leftover:
raise binascii.Error('Incorrect padding')
# Handle section 2.4 zero and one mapping. The flag map01 will be either
# False, or the character to map the digit 1 (one) to. It should be
# either L (el) or I (eye).
if map01 is not None:
map01 = _bytes_from_decode_data(map01)
assert len(map01) == 1, repr(map01)
s = s.translate(bytes.maketrans(b'01', b'O' + map01))
if casefold:
s = s.upper()
# Strip off pad characters from the right. We need to count the pad
# characters because this will tell us how many null bytes to remove from
# the end of the decoded string.
padchars = s.find(b'=')
if padchars > 0:
padchars = len(s) - padchars
s = s[:-padchars]
else:
padchars = 0
# Now decode the full quanta
parts = []
acc = 0
shift = 35
for c in s:
val = _b32rev.get(c)
if val is None:
raise binascii.Error('Non-base32 digit found')
acc += _b32rev[c] << shift
shift -= 5
if shift < 0:
parts.append(binascii.unhexlify(bytes('%010x' % acc, "ascii")))
acc = 0
shift = 35
# Process the last, partial quanta
last = binascii.unhexlify(bytes('%010x' % acc, "ascii"))
if padchars == 0:
last = b'' # No characters
elif padchars == 1:
last = last[:-1]
elif padchars == 3:
last = last[:-2]
elif padchars == 4:
last = last[:-3]
elif padchars == 6:
last = last[:-4]
else:
raise binascii.Error('Incorrect padding')
parts.append(last)
return b''.join(parts)
# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
# lowercase. The RFC also recommends against accepting input case
# insensitively.
def b16encode(s):
"""Encode a byte string using Base16.
s is the byte string to encode. The encoded byte string is returned.
"""
if not isinstance(s, bytes_types):
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
return binascii.hexlify(s).upper()
def b16decode(s, casefold=False):
"""Decode a Base16 encoded byte string.
s is the byte string to decode. Optional casefold is a flag
specifying whether a lowercase alphabet is acceptable as input.
For security purposes, the default is False.
The decoded byte string is returned. binascii.Error is raised if
s were incorrectly padded or if there are non-alphabet characters
present in the string.
"""
s = _bytes_from_decode_data(s)
if casefold:
s = s.upper()
if re.search(b'[^0-9A-F]', s):
raise binascii.Error('Non-base16 digit found')
return binascii.unhexlify(s)
# Legacy interface. This code could be cleaned up since I don't believe
# binascii has any line length limitations. It just doesn't seem worth it
# though. The files should be opened in binary mode.
MAXLINESIZE = 76 # Excluding the CRLF
MAXBINSIZE = (MAXLINESIZE//4)*3
def encode(input, output):
"""Encode a file; input and output are binary files."""
while True:
s = input.read(MAXBINSIZE)
if not s:
break
while len(s) < MAXBINSIZE:
ns = input.read(MAXBINSIZE-len(s))
if not ns:
break
s += ns
line = binascii.b2a_base64(s)
output.write(line)
def decode(input, output):
"""Decode a file; input and output are binary files."""
while True:
line = input.readline()
if not line:
break
s = binascii.a2b_base64(line)
output.write(s)
def encodebytes(s):
"""Encode a bytestring into a bytestring containing multiple lines
of base-64 data."""
if not isinstance(s, bytes_types):
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
pieces = []
for i in range(0, len(s), MAXBINSIZE):
chunk = s[i : i + MAXBINSIZE]
pieces.append(binascii.b2a_base64(chunk))
return b"".join(pieces)
def encodestring(s):
"""Legacy alias of encodebytes()."""
import warnings
warnings.warn("encodestring() is a deprecated alias, use encodebytes()",
DeprecationWarning, 2)
return encodebytes(s)
def decodebytes(s):
"""Decode a bytestring of base-64 data into a bytestring."""
if not isinstance(s, bytes_types):
raise TypeError("expected bytes, not %s" % s.__class__.__name__)
return binascii.a2b_base64(s)
def decodestring(s):
"""Legacy alias of decodebytes()."""
import warnings
warnings.warn("decodestring() is a deprecated alias, use decodebytes()",
DeprecationWarning, 2)
return decodebytes(s)
# Usable as a script...
def main():
"""Small main program"""
import sys, getopt
try:
opts, args = getopt.getopt(sys.argv[1:], 'deut')
except getopt.error as msg:
sys.stdout = sys.stderr
print(msg)
print("""usage: %s [-d|-e|-u|-t] [file|-]
-d, -u: decode
-e: encode (default)
-t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0])
sys.exit(2)
func = encode
for o, a in opts:
if o == '-e': func = encode
if o == '-d': func = decode
if o == '-u': func = decode
if o == '-t': test(); return
if args and args[0] != '-':
with open(args[0], 'rb') as f:
func(f, sys.stdout.buffer)
else:
func(sys.stdin.buffer, sys.stdout.buffer)
def test():
s0 = b"Aladdin:open sesame"
print(repr(s0))
s1 = encodebytes(s0)
print(repr(s1))
s2 = decodebytes(s1)
print(repr(s2))
assert s0 == s2
if __name__ == '__main__':
main()

View File

@ -0,0 +1,38 @@
#
# This is validation script for "boom" tool https://github.com/tarekziade/boom
# To use it:
#
# boom -n1000 --post-hook=boom_uasyncio.validate <rest of boom args>
#
# Note that if you'll use other -n value, you should update NUM_REQS below
# to match.
#
NUM_REQS = 1000
seen = []
cnt = 0
def validate(resp):
global cnt
t = resp.text
l = t.split("\r\n", 1)[0]
no = int(l.split()[1])
seen.append(no)
c = t.count(l + "\r\n")
assert c == 400101, str(c)
assert t.endswith("=== END ===")
cnt += 1
if cnt == NUM_REQS:
seen.sort()
print
print seen
print
el = None
for i in seen:
if el is None:
el = i
else:
el += 1
assert i == el
return resp

View File

@ -0,0 +1,17 @@
#!/bin/sh
#
# This in one-shot scripts to test "light load" uasyncio HTTP server using
# Apache Bench (ab).
#
#python3.4.2 test_http_server_light.py &
#micropython -O test_http_server_light.py &
#python3.4.2 test_http_server_medium.py &
micropython -O -X heapsize=200wK test_http_server_medium.py &
sleep 1
ab -n10000 -c100 http://127.0.0.1:8081/
kill %1

View File

@ -0,0 +1,28 @@
#!/bin/sh
#
# This in one-shot scripts to test "heavy load" uasyncio HTTP server using
# Boom tool https://github.com/tarekziade/boom .
#
# Note that this script doesn't test performance, but rather test functional
# correctness of uasyncio server implementation, while serving large amounts
# of data (guaranteedly more than a socket buffer). Thus, this script should
# not be used for benchmarking.
#
if [ ! -d .venv-boom ]; then
virtualenv .venv-boom
. .venv-boom/bin/activate
# PyPI currently has 0.8 which is too old
#pip install boom
pip install git+https://github.com/tarekziade/boom
else
. .venv-boom/bin/activate
fi
micropython -X heapsize=300000000 -O test_http_server_heavy.py &
sleep 1
PYTHONPATH=. boom -n1000 -c30 http://localhost:8081 --post-hook=boom_uasyncio.validate
kill %1

View File

@ -0,0 +1,40 @@
import uasyncio as asyncio
import signal
import errno
cnt = 0
@asyncio.coroutine
def serve(reader, writer):
global cnt
#s = "Hello.\r\n"
s = "Hello. %07d\r\n" % cnt
cnt += 1
yield from reader.read()
yield from writer.awrite("HTTP/1.0 200 OK\r\n\r\n")
try:
yield from writer.awrite(s)
yield from writer.awrite(s * 100)
yield from writer.awrite(s * 400000)
yield from writer.awrite("=== END ===")
except OSError as e:
if e.args[0] == errno.EPIPE:
print("EPIPE")
elif e.args[0] == errno.ECONNRESET:
print("ECONNRESET")
else:
raise
finally:
yield from writer.aclose()
import logging
logging.basicConfig(level=logging.INFO)
#logging.basicConfig(level=logging.DEBUG)
signal.signal(signal.SIGPIPE, signal.SIG_IGN)
loop = asyncio.get_event_loop()
#mem_info()
loop.call_soon(asyncio.start_server(serve, "0.0.0.0", 8081, backlog=100))
loop.run_forever()
loop.close()

View File

@ -0,0 +1,21 @@
import uasyncio as asyncio
@asyncio.coroutine
def serve(reader, writer):
#print(reader, writer)
#print("================")
yield from reader.read(512)
yield from writer.awrite("HTTP/1.0 200 OK\r\n\r\nHello.\r\n")
yield from writer.aclose()
#print("Finished processing request")
import logging
#logging.basicConfig(level=logging.INFO)
logging.basicConfig(level=logging.DEBUG)
loop = asyncio.get_event_loop()
#mem_info()
loop.create_task(asyncio.start_server(serve, "127.0.0.1", 8081, backlog=100))
loop.run_forever()
loop.close()

View File

@ -0,0 +1,22 @@
import uasyncio as asyncio
resp = "HTTP/1.0 200 OK\r\n\r\n" + "Hello.\r\n" * 1500
@asyncio.coroutine
def serve(reader, writer):
#print(reader, writer)
#print("================")
yield from reader.read(512)
yield from writer.awrite(resp)
yield from writer.aclose()
#print("Finished processing request")
import logging
#logging.basicConfig(level=logging.INFO)
logging.basicConfig(level=logging.DEBUG)
loop = asyncio.get_event_loop(80)
#mem_info()
loop.create_task(asyncio.start_server(serve, "127.0.0.1", 8081, backlog=100))
loop.run_forever()
loop.close()

113
upip/binascii.py Normal file
View File

@ -0,0 +1,113 @@
from ubinascii import *
if not "unhexlify" in globals():
def unhexlify(data):
if len(data) % 2 != 0:
raise ValueError("Odd-length string")
return bytes([ int(data[i:i+2], 16) for i in range(0, len(data), 2) ])
b2a_hex = hexlify
a2b_hex = unhexlify
# ____________________________________________________________
PAD = '='
table_a2b_base64 = [
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1, # Note PAD->-1 here
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
]
def _transform(n):
if n == -1:
return '\xff'
else:
return chr(n)
table_a2b_base64 = ''.join(map(_transform, table_a2b_base64))
assert len(table_a2b_base64) == 256
def a2b_base64(ascii):
"Decode a line of base64 data."
res = []
quad_pos = 0
leftchar = 0
leftbits = 0
last_char_was_a_pad = False
for c in ascii:
c = chr(c)
if c == PAD:
if quad_pos > 2 or (quad_pos == 2 and last_char_was_a_pad):
break # stop on 'xxx=' or on 'xx=='
last_char_was_a_pad = True
else:
n = ord(table_a2b_base64[ord(c)])
if n == 0xff:
continue # ignore strange characters
#
# Shift it in on the low end, and see if there's
# a byte ready for output.
quad_pos = (quad_pos + 1) & 3
leftchar = (leftchar << 6) | n
leftbits += 6
#
if leftbits >= 8:
leftbits -= 8
res.append((leftchar >> leftbits).to_bytes(1, 'big'))
leftchar &= ((1 << leftbits) - 1)
#
last_char_was_a_pad = False
else:
if leftbits != 0:
raise Exception("Incorrect padding")
return b''.join(res)
# ____________________________________________________________
table_b2a_base64 = (
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
def b2a_base64(bin):
"Base64-code line of data."
newlength = (len(bin) + 2) // 3
newlength = newlength * 4 + 1
res = []
leftchar = 0
leftbits = 0
for c in bin:
# Shift into our buffer, and output any 6bits ready
leftchar = (leftchar << 8) | c
leftbits += 8
res.append(table_b2a_base64[(leftchar >> (leftbits-6)) & 0x3f])
leftbits -= 6
if leftbits >= 6:
res.append(table_b2a_base64[(leftchar >> (leftbits-6)) & 0x3f])
leftbits -= 6
#
if leftbits == 2:
res.append(table_b2a_base64[(leftchar & 3) << 4])
res.append(PAD)
res.append(PAD)
elif leftbits == 4:
res.append(table_b2a_base64[(leftchar & 0xf) << 2])
res.append(PAD)
res.append('\n')
return ''.join(res).encode('ascii')

92
upip/bisect.py Normal file
View File

@ -0,0 +1,92 @@
"""Bisection algorithms."""
def insort_right(a, x, lo=0, hi=None):
"""Insert item x in list a, and keep it sorted assuming a is sorted.
If x is already in a, insert it to the right of the rightmost x.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if x < a[mid]: hi = mid
else: lo = mid+1
a.insert(lo, x)
insort = insort_right # backward compatibility
def bisect_right(a, x, lo=0, hi=None):
"""Return the index where to insert item x in list a, assuming a is sorted.
The return value i is such that all e in a[:i] have e <= x, and all e in
a[i:] have e > x. So if x already appears in the list, a.insert(x) will
insert just after the rightmost x already there.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if x < a[mid]: hi = mid
else: lo = mid+1
return lo
bisect = bisect_right # backward compatibility
def insort_left(a, x, lo=0, hi=None):
"""Insert item x in list a, and keep it sorted assuming a is sorted.
If x is already in a, insert it to the left of the leftmost x.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if a[mid] < x: lo = mid+1
else: hi = mid
a.insert(lo, x)
def bisect_left(a, x, lo=0, hi=None):
"""Return the index where to insert item x in list a, assuming a is sorted.
The return value i is such that all e in a[:i] have e < x, and all e in
a[i:] have e >= x. So if x already appears in the list, a.insert(x) will
insert just before the leftmost x already there.
Optional args lo (default 0) and hi (default len(a)) bound the
slice of a to be searched.
"""
if lo < 0:
raise ValueError('lo must be non-negative')
if hi is None:
hi = len(a)
while lo < hi:
mid = (lo+hi)//2
if a[mid] < x: lo = mid+1
else: hi = mid
return lo
# Overwrite above definitions with a fast C implementation
try:
from _bisect import *
except ImportError:
pass

1046
upip/cgi.py Normal file

File diff suppressed because it is too large Load Diff

337
upip/cmd.py Normal file
View File

@ -0,0 +1,337 @@
"""A generic class to build line-oriented command interpreters.
Interpreters constructed with this class obey the following conventions:
1. End of file on input is processed as the command 'EOF'.
2. A command is parsed out of each line by collecting the prefix composed
of characters in the identchars member.
3. A command `foo' is dispatched to a method 'do_foo()'; the do_ method
is passed a single argument consisting of the remainder of the line.
4. Typing an empty line repeats the last command. (Actually, it calls the
method `emptyline', which may be overridden in a subclass.)
5. There is a predefined `help' method. Given an argument `topic', it
calls the command `help_topic'. With no arguments, it lists all topics
with defined help_ functions, broken into up to three topics; documented
commands, miscellaneous help topics, and undocumented commands.
6. The command '?' is a synonym for `help'. The command '!' is a synonym
for `shell', if a do_shell method exists.
7. If completion is enabled, completing commands will be done automatically,
and completing of commands args is done by calling complete_foo() with
arguments text, line, begidx, endidx. text is string we are matching
against, all returned matches must begin with it. line is the current
input line (lstripped), begidx and endidx are the beginning and end
indexes of the text being matched, which could be used to provide
different completion depending upon which position the argument is in.
The `default' method may be overridden to intercept commands for which there
is no do_ method.
The `completedefault' method may be overridden to intercept completions for
commands that have no complete_ method.
The data member `self.ruler' sets the character used to draw separator lines
in the help messages. If empty, no ruler line is drawn. It defaults to "=".
If the value of `self.intro' is nonempty when the cmdloop method is called,
it is printed out on interpreter startup. This value may be overridden
via an optional argument to the cmdloop() method.
The data members `self.doc_header', `self.misc_header', and
`self.undoc_header' set the headers used for the help function's
listings of documented functions, miscellaneous topics, and undocumented
functions respectively.
----------------------------------------------------------------------------
This is a copy of python's Cmd, but leaves out features that aren't relevant
or can't currently be implemented for MicroPython.
One of the notable deviations is that since MicroPython strips doc strings,
this means that that help by doc string feature doesn't work.
completions have also been stripped out.
"""
#import string, sys
import sys # MiroPython doesn't yet have a string module
__all__ = ["Cmd"]
PROMPT = '(Cmd) '
#IDENTCHARS = string.ascii_letters + string.digits + '_'
IDENTCHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
class Cmd:
"""A simple framework for writing line-oriented command interpreters.
These are often useful for test harnesses, administrative tools, and
prototypes that will later be wrapped in a more sophisticated interface.
A Cmd instance or subclass instance is a line-oriented interpreter
framework. There is no good reason to instantiate Cmd itself; rather,
it's useful as a superclass of an interpreter class you define yourself
in order to inherit Cmd's methods and encapsulate action methods.
"""
prompt = PROMPT
identchars = IDENTCHARS
ruler = '='
lastcmd = ''
intro = None
doc_leader = ""
doc_header = "Documented commands (type help <topic>):"
misc_header = "Miscellaneous help topics:"
undoc_header = "Undocumented commands:"
nohelp = "*** No help on %s"
use_rawinput = 1
def __init__(self, stdin=None, stdout=None):
"""Instantiate a line-oriented interpreter framework.
The optional arguments stdin and stdout
specify alternate input and output file objects; if not specified,
sys.stdin and sys.stdout are used.
"""
if stdin is not None:
self.stdin = stdin
else:
self.stdin = sys.stdin
if stdout is not None:
self.stdout = stdout
else:
self.stdout = sys.stdout
self.cmdqueue = []
def cmdloop(self, intro=None):
"""Repeatedly issue a prompt, accept input, parse an initial prefix
off the received input, and dispatch to action methods, passing them
the remainder of the line as argument.
"""
self.preloop()
try:
if intro is not None:
self.intro = intro
if self.intro:
self.stdout.write(str(self.intro)+"\n")
stop = None
while not stop:
if self.cmdqueue:
line = self.cmdqueue.pop(0)
else:
if self.use_rawinput:
try:
line = input(self.prompt)
except EOFError:
line = 'EOF'
else:
self.stdout.write(self.prompt)
self.stdout.flush()
line = self.stdin.readline()
if not len(line):
line = 'EOF'
else:
line = line.rstrip('\r\n')
line = self.precmd(line)
stop = self.onecmd(line)
stop = self.postcmd(stop, line)
self.postloop()
finally:
pass
def precmd(self, line):
"""Hook method executed just before the command line is
interpreted, but after the input prompt is generated and issued.
"""
return line
def postcmd(self, stop, line):
"""Hook method executed just after a command dispatch is finished."""
return stop
def preloop(self):
"""Hook method executed once when the cmdloop() method is called."""
pass
def postloop(self):
"""Hook method executed once when the cmdloop() method is about to
return.
"""
pass
def parseline(self, line):
"""Parse the line into a command name and a string containing
the arguments. Returns a tuple containing (command, args, line).
'command' and 'args' may be None if the line couldn't be parsed.
"""
line = line.strip()
if not line:
return None, None, line
elif line[0] == '?':
line = 'help ' + line[1:]
elif line[0] == '!':
if hasattr(self, 'do_shell'):
line = 'shell ' + line[1:]
else:
return None, None, line
i, n = 0, len(line)
while i < n and line[i] in self.identchars: i = i+1
cmd, arg = line[:i], line[i:].strip()
return cmd, arg, line
def onecmd(self, line):
"""Interpret the argument as though it had been typed in response
to the prompt.
This may be overridden, but should not normally need to be;
see the precmd() and postcmd() methods for useful execution hooks.
The return value is a flag indicating whether interpretation of
commands by the interpreter should stop.
"""
cmd, arg, line = self.parseline(line)
if not line:
return self.emptyline()
if cmd is None:
return self.default(line)
self.lastcmd = line
if line == 'EOF' :
self.lastcmd = ''
if cmd == '':
return self.default(line)
else:
try:
func = getattr(self, 'do_' + cmd)
except AttributeError:
return self.default(line)
return func(arg)
def emptyline(self):
"""Called when an empty line is entered in response to the prompt.
If this method is not overridden, it repeats the last nonempty
command entered.
"""
if self.lastcmd:
return self.onecmd(self.lastcmd)
def default(self, line):
"""Called on an input line when the command prefix is not recognized.
If this method is not overridden, it prints an error message and
returns.
"""
self.stdout.write('*** Unknown syntax: %s\n'%line)
def get_names(self):
# This method used to pull in base class attributes
# at a time dir() didn't do it yet.
return dir(self.__class__)
def do_help(self, arg):
'List available commands with "help" or detailed help with "help cmd".'
if arg:
# XXX check arg syntax
try:
func = getattr(self, 'help_' + arg)
except AttributeError:
self.stdout.write("%s\n"%str(self.nohelp % (arg,)))
return
func()
else:
names = self.get_names()
cmds_doc = []
cmds_undoc = []
help = {}
for name in names:
if name[:5] == 'help_':
help[name[5:]]=1
names.sort()
# There can be duplicates if routines overridden
prevname = ''
for name in names:
if name[:3] == 'do_':
if name == prevname:
continue
prevname = name
cmd=name[3:]
if cmd in help:
cmds_doc.append(cmd)
del help[cmd]
else:
cmds_undoc.append(cmd)
self.stdout.write("%s\n"%str(self.doc_leader))
self.print_topics(self.doc_header, cmds_doc, 15,80)
self.print_topics(self.misc_header, list(help.keys()),15,80)
self.print_topics(self.undoc_header, cmds_undoc, 15,80)
def print_topics(self, header, cmds, cmdlen, maxcol):
if cmds:
self.stdout.write("%s\n"%str(header))
if self.ruler:
self.stdout.write("%s\n"%str(self.ruler * len(header)))
self.columnize(cmds, maxcol-1)
self.stdout.write("\n")
def columnize(self, list, displaywidth=80):
"""Display a list of strings as a compact set of columns.
Each column is only as wide as necessary.
Columns are separated by two spaces (one was not legible enough).
"""
if not list:
self.stdout.write("<empty>\n")
return
nonstrings = [i for i in range(len(list))
if not isinstance(list[i], str)]
if nonstrings:
raise TypeError("list[i] not a string for i in %s"
% ", ".join(map(str, nonstrings)))
size = len(list)
if size == 1:
self.stdout.write('%s\n'%str(list[0]))
return
# Try every row count from 1 upwards
for nrows in range(1, len(list)):
ncols = (size+nrows-1) // nrows
colwidths = []
totwidth = -2
for col in range(ncols):
colwidth = 0
for row in range(nrows):
i = row + nrows*col
if i >= size:
break
x = list[i]
colwidth = max(colwidth, len(x))
colwidths.append(colwidth)
totwidth += colwidth + 2
if totwidth > displaywidth:
break
if totwidth <= displaywidth:
break
else:
nrows = len(list)
ncols = 1
colwidths = [0]
for row in range(nrows):
texts = []
for col in range(ncols):
i = row + nrows*col
if i >= size:
x = ""
else:
x = list[i]
texts.append(x)
while texts and not texts[-1]:
del texts[-1]
for col in range(len(texts)):
#texts[col] = texts[col].ljust(colwidths[col])
texts[col] = '%-*s' % (colwidths[col], texts[col])
self.stdout.write("%s\n"%str(" ".join(texts)))

View File

@ -0,0 +1,16 @@
# Should be reimplemented for MicroPython
# Reason:
# CPython implementation brings in metaclasses and other bloat.
# This is going to be just import-all for other modules in a namespace package
from ucollections import *
try:
from .defaultdict import defaultdict
except ImportError:
pass
try:
from .deque import deque
except ImportError:
pass
class MutableMapping:
pass

View File

@ -0,0 +1,36 @@
class defaultdict:
@staticmethod
def __new__(cls, default_factory=None, **kwargs):
# Some code (e.g. urllib.urlparse) expects that basic defaultdict
# functionality will be available to subclasses without them
# calling __init__().
self = super(defaultdict, cls).__new__(cls)
self.d = {}
return self
def __init__(self, default_factory=None, **kwargs):
self.d = kwargs
self.default_factory = default_factory
def __getitem__(self, key):
try:
return self.d[key]
except KeyError:
v = self.__missing__(key)
self.d[key] = v
return v
def __setitem__(self, key, v):
self.d[key] = v
def __delitem__(self, key):
del self.d[key]
def __contains__(self, key):
return key in self.d
def __missing__(self, key):
if self.default_factory is None:
raise KeyError(key)
return self.default_factory()

37
upip/collections/deque.py Normal file
View File

@ -0,0 +1,37 @@
class deque:
def __init__(self, iterable=None):
if iterable is None:
self.q = []
else:
self.q = list(iterable)
def popleft(self):
return self.q.pop(0)
def popright(self):
return self.q.pop()
def pop(self):
return self.q.pop()
def append(self, a):
self.q.append(a)
def appendleft(self, a):
self.q.insert(0, a)
def extend(self, a):
self.q.extend(a)
def __len__(self):
return len(self.q)
def __bool__(self):
return bool(self.q)
def __iter__(self):
yield from self.q
def __str__(self):
return 'deque({})'.format(self.q)

View File

166
upip/contextlib.py Normal file
View File

@ -0,0 +1,166 @@
"""Utilities for with-statement contexts. See PEP 343.
Original source code: https://hg.python.org/cpython/file/3.4/Lib/contextlib.py
Not implemented:
- redirect_stdout;
"""
import sys
from collections import deque
from ucontextlib import *
class closing(object):
"""Context to automatically close something at the end of a block.
Code like this:
with closing(<module>.open(<arguments>)) as f:
<block>
is equivalent to this:
f = <module>.open(<arguments>)
try:
<block>
finally:
f.close()
"""
def __init__(self, thing):
self.thing = thing
def __enter__(self):
return self.thing
def __exit__(self, *exc_info):
self.thing.close()
class suppress:
"""Context manager to suppress specified exceptions
After the exception is suppressed, execution proceeds with the next
statement following the with statement.
with suppress(FileNotFoundError):
os.remove(somefile)
# Execution still resumes here if the file was already removed
"""
def __init__(self, *exceptions):
self._exceptions = exceptions
def __enter__(self):
pass
def __exit__(self, exctype, excinst, exctb):
# Unlike isinstance and issubclass, CPython exception handling
# currently only looks at the concrete type hierarchy (ignoring
# the instance and subclass checking hooks). While Guido considers
# that a bug rather than a feature, it's a fairly hard one to fix
# due to various internal implementation details. suppress provides
# the simpler issubclass based semantics, rather than trying to
# exactly reproduce the limitations of the CPython interpreter.
#
# See http://bugs.python.org/issue12029 for more details
return exctype is not None and issubclass(exctype, self._exceptions)
# Inspired by discussions on http://bugs.python.org/issue13585
class ExitStack(object):
"""Context manager for dynamic management of a stack of exit callbacks
For example:
with ExitStack() as stack:
files = [stack.enter_context(open(fname)) for fname in filenames]
# All opened files will automatically be closed at the end of
# the with statement, even if attempts to open files later
# in the list raise an exception
"""
def __init__(self):
self._exit_callbacks = deque()
def pop_all(self):
"""Preserve the context stack by transferring it to a new instance"""
new_stack = type(self)()
new_stack._exit_callbacks = self._exit_callbacks
self._exit_callbacks = deque()
return new_stack
def _push_cm_exit(self, cm, cm_exit):
"""Helper to correctly register callbacks to __exit__ methods"""
def _exit_wrapper(*exc_details):
return cm_exit(cm, *exc_details)
self.push(_exit_wrapper)
def push(self, exit):
"""Registers a callback with the standard __exit__ method signature
Can suppress exceptions the same way __exit__ methods can.
Also accepts any object with an __exit__ method (registering a call
to the method instead of the object itself)
"""
# We use an unbound method rather than a bound method to follow
# the standard lookup behaviour for special methods
_cb_type = type(exit)
try:
exit_method = _cb_type.__exit__
except AttributeError:
# Not a context manager, so assume its a callable
self._exit_callbacks.append(exit)
else:
self._push_cm_exit(exit, exit_method)
return exit # Allow use as a decorator
def callback(self, callback, *args, **kwds):
"""Registers an arbitrary callback and arguments.
Cannot suppress exceptions.
"""
def _exit_wrapper(exc_type, exc, tb):
callback(*args, **kwds)
self.push(_exit_wrapper)
return callback # Allow use as a decorator
def enter_context(self, cm):
"""Enters the supplied context manager
If successful, also pushes its __exit__ method as a callback and
returns the result of the __enter__ method.
"""
# We look up the special methods on the type to match the with statement
_cm_type = type(cm)
_exit = _cm_type.__exit__
result = _cm_type.__enter__(cm)
self._push_cm_exit(cm, _exit)
return result
def close(self):
"""Immediately unwind the context stack"""
self.__exit__(None, None, None)
def __enter__(self):
return self
def __exit__(self, *exc_details):
received_exc = exc_details[0] is not None
# Callbacks are invoked in LIFO order to match the behaviour of
# nested context managers
suppressed_exc = False
pending_raise = False
while self._exit_callbacks:
cb = self._exit_callbacks.pop()
try:
if cb(*exc_details):
suppressed_exc = True
pending_raise = False
exc_details = (None, None, None)
except:
exc_details = sys.exc_info()
pending_raise = True
if pending_raise:
raise exc_details[1]
return received_exc and suppressed_exc

328
upip/copy.py Normal file
View File

@ -0,0 +1,328 @@
"""Generic (shallow and deep) copying operations.
Interface summary:
import copy
x = copy.copy(y) # make a shallow copy of y
x = copy.deepcopy(y) # make a deep copy of y
For module specific errors, copy.Error is raised.
The difference between shallow and deep copying is only relevant for
compound objects (objects that contain other objects, like lists or
class instances).
- A shallow copy constructs a new compound object and then (to the
extent possible) inserts *the same objects* into it that the
original contains.
- A deep copy constructs a new compound object and then, recursively,
inserts *copies* into it of the objects found in the original.
Two problems often exist with deep copy operations that don't exist
with shallow copy operations:
a) recursive objects (compound objects that, directly or indirectly,
contain a reference to themselves) may cause a recursive loop
b) because deep copy copies *everything* it may copy too much, e.g.
administrative data structures that should be shared even between
copies
Python's deep copy operation avoids these problems by:
a) keeping a table of objects already copied during the current
copying pass
b) letting user-defined classes override the copying operation or the
set of components copied
This version does not copy types like module, class, function, method,
nor stack trace, stack frame, nor file, socket, window, nor array, nor
any similar types.
Classes can use the same interfaces to control copying that they use
to control pickling: they can define methods called __getinitargs__(),
__getstate__() and __setstate__(). See the documentation for module
"pickle" for information on these methods.
"""
import types
#import weakref
#from copyreg import dispatch_table
#import builtins
class Error(Exception):
pass
error = Error # backward compatibility
try:
from org.python.core import PyStringMap
except ImportError:
PyStringMap = None
__all__ = ["Error", "copy", "deepcopy"]
def copy(x):
"""Shallow copy operation on arbitrary Python objects.
See the module's __doc__ string for more info.
"""
cls = type(x)
copier = _copy_dispatch.get(cls)
if copier:
return copier(x)
copier = getattr(cls, "__copy__", None)
if copier:
return copier(x)
raise Error("un(shallow)copyable object of type %s" % cls)
dispatch_table = {}
reductor = dispatch_table.get(cls)
if reductor:
rv = reductor(x)
else:
reductor = getattr(x, "__reduce_ex__", None)
if reductor:
rv = reductor(2)
else:
reductor = getattr(x, "__reduce__", None)
if reductor:
rv = reductor()
else:
raise Error("un(shallow)copyable object of type %s" % cls)
return _reconstruct(x, rv, 0)
_copy_dispatch = d = {}
def _copy_immutable(x):
return x
for t in (type(None), int, float, bool, str, tuple,
type, range,
types.BuiltinFunctionType, type(Ellipsis),
types.FunctionType):
d[t] = _copy_immutable
t = getattr(types, "CodeType", None)
if t is not None:
d[t] = _copy_immutable
#for name in ("complex", "unicode"):
# t = getattr(builtins, name, None)
# if t is not None:
# d[t] = _copy_immutable
def _copy_with_constructor(x):
return type(x)(x)
for t in (list, dict, set):
d[t] = _copy_with_constructor
def _copy_with_copy_method(x):
return x.copy()
if PyStringMap is not None:
d[PyStringMap] = _copy_with_copy_method
del d
def deepcopy(x, memo=None, _nil=[]):
"""Deep copy operation on arbitrary Python objects.
See the module's __doc__ string for more info.
"""
if memo is None:
memo = {}
d = id(x)
y = memo.get(d, _nil)
if y is not _nil:
return y
cls = type(x)
copier = _deepcopy_dispatch.get(cls)
if copier:
y = copier(x, memo)
else:
try:
issc = issubclass(cls, type)
except TypeError: # cls is not a class (old Boost; see SF #502085)
issc = 0
if issc:
y = _deepcopy_atomic(x, memo)
else:
copier = getattr(x, "__deepcopy__", None)
if copier:
y = copier(memo)
else:
reductor = dispatch_table.get(cls)
if reductor:
rv = reductor(x)
else:
reductor = getattr(x, "__reduce_ex__", None)
if reductor:
rv = reductor(2)
else:
reductor = getattr(x, "__reduce__", None)
if reductor:
rv = reductor()
else:
raise Error(
"un(deep)copyable object of type %s" % cls)
y = _reconstruct(x, rv, 1, memo)
# If is its own copy, don't memoize.
if y is not x:
memo[d] = y
_keep_alive(x, memo) # Make sure x lives at least as long as d
return y
_deepcopy_dispatch = d = {}
def _deepcopy_atomic(x, memo):
return x
d[type(None)] = _deepcopy_atomic
d[type(Ellipsis)] = _deepcopy_atomic
d[int] = _deepcopy_atomic
d[float] = _deepcopy_atomic
d[bool] = _deepcopy_atomic
try:
d[complex] = _deepcopy_atomic
except NameError:
pass
d[bytes] = _deepcopy_atomic
d[str] = _deepcopy_atomic
try:
d[types.CodeType] = _deepcopy_atomic
except AttributeError:
pass
d[type] = _deepcopy_atomic
d[range] = _deepcopy_atomic
d[types.BuiltinFunctionType] = _deepcopy_atomic
d[types.FunctionType] = _deepcopy_atomic
#d[weakref.ref] = _deepcopy_atomic
def _deepcopy_list(x, memo):
y = []
memo[id(x)] = y
for a in x:
y.append(deepcopy(a, memo))
return y
d[list] = _deepcopy_list
def _deepcopy_tuple(x, memo):
y = []
for a in x:
y.append(deepcopy(a, memo))
# We're not going to put the tuple in the memo, but it's still important we
# check for it, in case the tuple contains recursive mutable structures.
try:
return memo[id(x)]
except KeyError:
pass
for i in range(len(x)):
if x[i] is not y[i]:
y = tuple(y)
break
else:
y = x
return y
d[tuple] = _deepcopy_tuple
def _deepcopy_dict(x, memo):
y = {}
memo[id(x)] = y
for key, value in x.items():
y[deepcopy(key, memo)] = deepcopy(value, memo)
return y
d[dict] = _deepcopy_dict
if PyStringMap is not None:
d[PyStringMap] = _deepcopy_dict
def _deepcopy_method(x, memo): # Copy instance methods
return type(x)(x.__func__, deepcopy(x.__self__, memo))
_deepcopy_dispatch[types.MethodType] = _deepcopy_method
def _keep_alive(x, memo):
"""Keeps a reference to the object x in the memo.
Because we remember objects by their id, we have
to assure that possibly temporary objects are kept
alive by referencing them.
We store a reference at the id of the memo, which should
normally not be used unless someone tries to deepcopy
the memo itself...
"""
try:
memo[id(memo)].append(x)
except KeyError:
# aha, this is the first one :-)
memo[id(memo)]=[x]
def _reconstruct(x, info, deep, memo=None):
if isinstance(info, str):
return x
assert isinstance(info, tuple)
if memo is None:
memo = {}
n = len(info)
assert n in (2, 3, 4, 5)
callable, args = info[:2]
if n > 2:
state = info[2]
else:
state = {}
if n > 3:
listiter = info[3]
else:
listiter = None
if n > 4:
dictiter = info[4]
else:
dictiter = None
if deep:
args = deepcopy(args, memo)
y = callable(*args)
memo[id(x)] = y
if state:
if deep:
state = deepcopy(state, memo)
if hasattr(y, '__setstate__'):
y.__setstate__(state)
else:
if isinstance(state, tuple) and len(state) == 2:
state, slotstate = state
else:
slotstate = None
if state is not None:
y.__dict__.update(state)
if slotstate is not None:
for key, value in slotstate.items():
setattr(y, key, value)
if listiter is not None:
for item in listiter:
if deep:
item = deepcopy(item, memo)
y.append(item)
if dictiter is not None:
for key, value in dictiter:
if deep:
key = deepcopy(key, memo)
value = deepcopy(value, memo)
y[key] = value
return y
del d
del types
# Helper for instance creation without calling __init__
class _EmptyClass:
pass

99
upip/curses/ascii.py Normal file
View File

@ -0,0 +1,99 @@
"""Constants and membership tests for ASCII characters"""
NUL = 0x00 # ^@
SOH = 0x01 # ^A
STX = 0x02 # ^B
ETX = 0x03 # ^C
EOT = 0x04 # ^D
ENQ = 0x05 # ^E
ACK = 0x06 # ^F
BEL = 0x07 # ^G
BS = 0x08 # ^H
TAB = 0x09 # ^I
HT = 0x09 # ^I
LF = 0x0a # ^J
NL = 0x0a # ^J
VT = 0x0b # ^K
FF = 0x0c # ^L
CR = 0x0d # ^M
SO = 0x0e # ^N
SI = 0x0f # ^O
DLE = 0x10 # ^P
DC1 = 0x11 # ^Q
DC2 = 0x12 # ^R
DC3 = 0x13 # ^S
DC4 = 0x14 # ^T
NAK = 0x15 # ^U
SYN = 0x16 # ^V
ETB = 0x17 # ^W
CAN = 0x18 # ^X
EM = 0x19 # ^Y
SUB = 0x1a # ^Z
ESC = 0x1b # ^[
FS = 0x1c # ^\
GS = 0x1d # ^]
RS = 0x1e # ^^
US = 0x1f # ^_
SP = 0x20 # space
DEL = 0x7f # delete
controlnames = [
"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
"BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",
"DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
"CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US",
"SP"
]
def _ctoi(c):
if type(c) == type(""):
return ord(c)
else:
return c
def isalnum(c): return isalpha(c) or isdigit(c)
def isalpha(c): return isupper(c) or islower(c)
def isascii(c): return _ctoi(c) <= 127 # ?
def isblank(c): return _ctoi(c) in (8,32)
def iscntrl(c): return _ctoi(c) <= 31
def isdigit(c): return _ctoi(c) >= 48 and _ctoi(c) <= 57
def isgraph(c): return _ctoi(c) >= 33 and _ctoi(c) <= 126
def islower(c): return _ctoi(c) >= 97 and _ctoi(c) <= 122
def isprint(c): return _ctoi(c) >= 32 and _ctoi(c) <= 126
def ispunct(c): return _ctoi(c) != 32 and not isalnum(c)
def isspace(c): return _ctoi(c) in (9, 10, 11, 12, 13, 32)
def isupper(c): return _ctoi(c) >= 65 and _ctoi(c) <= 90
def isxdigit(c): return isdigit(c) or \
(_ctoi(c) >= 65 and _ctoi(c) <= 70) or (_ctoi(c) >= 97 and _ctoi(c) <= 102)
def isctrl(c): return _ctoi(c) < 32
def ismeta(c): return _ctoi(c) > 127
def ascii(c):
if type(c) == type(""):
return chr(_ctoi(c) & 0x7f)
else:
return _ctoi(c) & 0x7f
def ctrl(c):
if type(c) == type(""):
return chr(_ctoi(c) & 0x1f)
else:
return _ctoi(c) & 0x1f
def alt(c):
if type(c) == type(""):
return chr(_ctoi(c) | 0x80)
else:
return _ctoi(c) | 0x80
def unctrl(c):
bits = _ctoi(c)
if bits == 0x7f:
rep = "^?"
elif isprint(bits & 0x7f):
rep = chr(bits & 0x7f)
else:
rep = "^" + chr(((bits & 0x7f) | 0x20) + 0x20)
if bits & 0x80:
return "!" + rep
return rep

2138
upip/datetime.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,221 @@
""" Routines for manipulating RFC2047 encoded words.
This is currently a package-private API, but will be considered for promotion
to a public API if there is demand.
"""
# An ecoded word looks like this:
#
# =?charset[*lang]?cte?encoded_string?=
#
# for more information about charset see the charset module. Here it is one
# of the preferred MIME charset names (hopefully; you never know when parsing).
# cte (Content Transfer Encoding) is either 'q' or 'b' (ignoring case). In
# theory other letters could be used for other encodings, but in practice this
# (almost?) never happens. There could be a public API for adding entries
# to the CTE tables, but YAGNI for now. 'q' is Quoted Printable, 'b' is
# Base64. The meaning of encoded_string should be obvious. 'lang' is optional
# as indicated by the brackets (they are not part of the syntax) but is almost
# never encountered in practice.
#
# The general interface for a CTE decoder is that it takes the encoded_string
# as its argument, and returns a tuple (cte_decoded_string, defects). The
# cte_decoded_string is the original binary that was encoded using the
# specified cte. 'defects' is a list of MessageDefect instances indicating any
# problems encountered during conversion. 'charset' and 'lang' are the
# corresponding strings extracted from the EW, case preserved.
#
# The general interface for a CTE encoder is that it takes a binary sequence
# as input and returns the cte_encoded_string, which is an ascii-only string.
#
# Each decoder must also supply a length function that takes the binary
# sequence as its argument and returns the length of the resulting encoded
# string.
#
# The main API functions for the module are decode, which calls the decoder
# referenced by the cte specifier, and encode, which adds the appropriate
# RFC 2047 "chrome" to the encoded string, and can optionally automatically
# select the shortest possible encoding. See their docstrings below for
# details.
import re
import base64
import binascii
import functools
from string import ascii_letters, digits
from email import errors
__all__ = ['decode_q',
'encode_q',
'decode_b',
'encode_b',
'len_q',
'len_b',
'decode',
'encode',
]
#
# Quoted Printable
#
# regex based decoder.
_q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub,
lambda m: bytes([int(m.group(1), 16)]))
def decode_q(encoded):
encoded = encoded.replace(b'_', b' ')
return _q_byte_subber(encoded), []
# dict mapping bytes to their encoded form
class _QByteMap(dict):
safe = b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')
def __missing__(self, key):
if key in self.safe:
self[key] = chr(key)
else:
self[key] = "={:02X}".format(key)
return self[key]
_q_byte_map = _QByteMap()
# In headers spaces are mapped to '_'.
_q_byte_map[ord(' ')] = '_'
def encode_q(bstring):
return ''.join(_q_byte_map[x] for x in bstring)
def len_q(bstring):
return sum(len(_q_byte_map[x]) for x in bstring)
#
# Base64
#
def decode_b(encoded):
defects = []
pad_err = len(encoded) % 4
if pad_err:
defects.append(errors.InvalidBase64PaddingDefect())
padded_encoded = encoded + b'==='[:4-pad_err]
else:
padded_encoded = encoded
try:
return base64.b64decode(padded_encoded, validate=True), defects
except binascii.Error:
# Since we had correct padding, this must an invalid char error.
defects = [errors.InvalidBase64CharactersDefect()]
# The non-alphabet characters are ignored as far as padding
# goes, but we don't know how many there are. So we'll just
# try various padding lengths until something works.
for i in 0, 1, 2, 3:
try:
return base64.b64decode(encoded+b'='*i, validate=False), defects
except binascii.Error:
if i==0:
defects.append(errors.InvalidBase64PaddingDefect())
else:
# This should never happen.
raise AssertionError("unexpected binascii.Error")
def encode_b(bstring):
return base64.b64encode(bstring).decode('ascii')
def len_b(bstring):
groups_of_3, leftover = divmod(len(bstring), 3)
# 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
return groups_of_3 * 4 + (4 if leftover else 0)
_cte_decoders = {
'q': decode_q,
'b': decode_b,
}
def decode(ew):
"""Decode encoded word and return (string, charset, lang, defects) tuple.
An RFC 2047/2243 encoded word has the form:
=?charset*lang?cte?encoded_string?=
where '*lang' may be omitted but the other parts may not be.
This function expects exactly such a string (that is, it does not check the
syntax and may raise errors if the string is not well formed), and returns
the encoded_string decoded first from its Content Transfer Encoding and
then from the resulting bytes into unicode using the specified charset. If
the cte-decoded string does not successfully decode using the specified
character set, a defect is added to the defects list and the unknown octets
are replaced by the unicode 'unknown' character \uFDFF.
The specified charset and language are returned. The default for language,
which is rarely if ever encountered, is the empty string.
"""
_, charset, cte, cte_string, _ = ew.split('?')
charset, _, lang = charset.partition('*')
cte = cte.lower()
# Recover the original bytes and do CTE decoding.
bstring = cte_string.encode('ascii', 'surrogateescape')
bstring, defects = _cte_decoders[cte](bstring)
# Turn the CTE decoded bytes into unicode.
try:
string = bstring.decode(charset)
except UnicodeError:
defects.append(errors.UndecodableBytesDefect("Encoded word "
"contains bytes not decodable using {} charset".format(charset)))
string = bstring.decode(charset, 'surrogateescape')
except LookupError:
string = bstring.decode('ascii', 'surrogateescape')
if charset.lower() != 'unknown-8bit':
defects.append(errors.CharsetError("Unknown charset {} "
"in encoded word; decoded as unknown bytes".format(charset)))
return string, charset, lang, defects
_cte_encoders = {
'q': encode_q,
'b': encode_b,
}
_cte_encode_length = {
'q': len_q,
'b': len_b,
}
def encode(string, charset='utf-8', encoding=None, lang=''):
"""Encode string using the CTE encoding that produces the shorter result.
Produces an RFC 2047/2243 encoded word of the form:
=?charset*lang?cte?encoded_string?=
where '*lang' is omitted unless the 'lang' parameter is given a value.
Optional argument charset (defaults to utf-8) specifies the charset to use
to encode the string to binary before CTE encoding it. Optional argument
'encoding' is the cte specifier for the encoding that should be used ('q'
or 'b'); if it is None (the default) the encoding which produces the
shortest encoded sequence is used, except that 'q' is preferred if it is up
to five characters longer. Optional argument 'lang' (default '') gives the
RFC 2243 language string to specify in the encoded word.
"""
if charset == 'unknown-8bit':
bstring = string.encode('ascii', 'surrogateescape')
else:
bstring = string.encode(charset)
if encoding is None:
qlen = _cte_encode_length['q'](bstring)
blen = _cte_encode_length['b'](bstring)
# Bias toward q. 5 is arbitrary.
encoding = 'q' if qlen - blen < 5 else 'b'
encoded = _cte_encoders[encoding](bstring)
if lang:
lang = '*' + lang
return "=?{}{}?{}?{}?=".format(charset, lang, encoding, encoded)

540
upip/email/_parseaddr.py Normal file
View File

@ -0,0 +1,540 @@
# Copyright (C) 2002-2007 Python Software Foundation
# Contact: email-sig@python.org
"""Email address parsing code.
Lifted directly from rfc822.py. This should eventually be rewritten.
"""
__all__ = [
'mktime_tz',
'parsedate',
'parsedate_tz',
'quote',
]
import time, calendar
SPACE = ' '
EMPTYSTRING = ''
COMMASPACE = ', '
# Parse a date field
_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
'aug', 'sep', 'oct', 'nov', 'dec',
'january', 'february', 'march', 'april', 'may', 'june', 'july',
'august', 'september', 'october', 'november', 'december']
_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
# The timezone table does not include the military time zones defined
# in RFC822, other than Z. According to RFC1123, the description in
# RFC822 gets the signs wrong, so we can't rely on any such time
# zones. RFC1123 recommends that numeric timezone indicators be used
# instead of timezone names.
_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
'EST': -500, 'EDT': -400, # Eastern
'CST': -600, 'CDT': -500, # Central
'MST': -700, 'MDT': -600, # Mountain
'PST': -800, 'PDT': -700 # Pacific
}
def parsedate_tz(data):
"""Convert a date string to a time tuple.
Accounts for military timezones.
"""
res = _parsedate_tz(data)
if not res:
return
if res[9] is None:
res[9] = 0
return tuple(res)
def _parsedate_tz(data):
"""Convert date to extended time tuple.
The last (additional) element is the time zone offset in seconds, except if
the timezone was specified as -0000. In that case the last element is
None. This indicates a UTC timestamp that explicitly declaims knowledge of
the source timezone, as opposed to a +0000 timestamp that indicates the
source timezone really was UTC.
"""
if not data:
return
data = data.split()
# The FWS after the comma after the day-of-week is optional, so search and
# adjust for this.
if data[0].endswith(',') or data[0].lower() in _daynames:
# There's a dayname here. Skip it
del data[0]
else:
i = data[0].rfind(',')
if i >= 0:
data[0] = data[0][i+1:]
if len(data) == 3: # RFC 850 date, deprecated
stuff = data[0].split('-')
if len(stuff) == 3:
data = stuff + data[1:]
if len(data) == 4:
s = data[3]
i = s.find('+')
if i == -1:
i = s.find('-')
if i > 0:
data[3:] = [s[:i], s[i:]]
else:
data.append('') # Dummy tz
if len(data) < 5:
return None
data = data[:5]
[dd, mm, yy, tm, tz] = data
mm = mm.lower()
if mm not in _monthnames:
dd, mm = mm, dd.lower()
if mm not in _monthnames:
return None
mm = _monthnames.index(mm) + 1
if mm > 12:
mm -= 12
if dd[-1] == ',':
dd = dd[:-1]
i = yy.find(':')
if i > 0:
yy, tm = tm, yy
if yy[-1] == ',':
yy = yy[:-1]
if not yy[0].isdigit():
yy, tz = tz, yy
if tm[-1] == ',':
tm = tm[:-1]
tm = tm.split(':')
if len(tm) == 2:
[thh, tmm] = tm
tss = '0'
elif len(tm) == 3:
[thh, tmm, tss] = tm
elif len(tm) == 1 and '.' in tm[0]:
# Some non-compliant MUAs use '.' to separate time elements.
tm = tm[0].split('.')
if len(tm) == 2:
[thh, tmm] = tm
tss = 0
elif len(tm) == 3:
[thh, tmm, tss] = tm
else:
return None
try:
yy = int(yy)
dd = int(dd)
thh = int(thh)
tmm = int(tmm)
tss = int(tss)
except ValueError:
return None
# Check for a yy specified in two-digit format, then convert it to the
# appropriate four-digit format, according to the POSIX standard. RFC 822
# calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
# mandates a 4-digit yy. For more information, see the documentation for
# the time module.
if yy < 100:
# The year is between 1969 and 1999 (inclusive).
if yy > 68:
yy += 1900
# The year is between 2000 and 2068 (inclusive).
else:
yy += 2000
tzoffset = None
tz = tz.upper()
if tz in _timezones:
tzoffset = _timezones[tz]
else:
try:
tzoffset = int(tz)
except ValueError:
pass
if tzoffset==0 and tz.startswith('-'):
tzoffset = None
# Convert a timezone offset into seconds ; -0500 -> -18000
if tzoffset:
if tzoffset < 0:
tzsign = -1
tzoffset = -tzoffset
else:
tzsign = 1
tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
# Daylight Saving Time flag is set to -1, since DST is unknown.
return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
def parsedate(data):
"""Convert a time string to a time tuple."""
t = parsedate_tz(data)
if isinstance(t, tuple):
return t[:9]
else:
return t
def mktime_tz(data):
"""Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
if data[9] is None:
# No zone info, so localtime is better assumption than GMT
return time.mktime(data[:8] + (-1,))
else:
t = calendar.timegm(data)
return t - data[9]
def quote(str):
"""Prepare string to be used in a quoted string.
Turns backslash and double quote characters into quoted pairs. These
are the only characters that need to be quoted inside a quoted string.
Does not add the surrounding double quotes.
"""
return str.replace('\\', '\\\\').replace('"', '\\"')
class AddrlistClass:
"""Address parser class by Ben Escoto.
To understand what this class does, it helps to have a copy of RFC 2822 in
front of you.
Note: this class interface is deprecated and may be removed in the future.
Use email.utils.AddressList instead.
"""
def __init__(self, field):
"""Initialize a new instance.
`field' is an unparsed address header field, containing
one or more addresses.
"""
self.specials = '()<>@,:;.\"[]'
self.pos = 0
self.LWS = ' \t'
self.CR = '\r\n'
self.FWS = self.LWS + self.CR
self.atomends = self.specials + self.LWS + self.CR
# Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
# is obsolete syntax. RFC 2822 requires that we recognize obsolete
# syntax, so allow dots in phrases.
self.phraseends = self.atomends.replace('.', '')
self.field = field
self.commentlist = []
def gotonext(self):
"""Skip white space and extract comments."""
wslist = []
while self.pos < len(self.field):
if self.field[self.pos] in self.LWS + '\n\r':
if self.field[self.pos] not in '\n\r':
wslist.append(self.field[self.pos])
self.pos += 1
elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment())
else:
break
return EMPTYSTRING.join(wslist)
def getaddrlist(self):
"""Parse all addresses.
Returns a list containing all of the addresses.
"""
result = []
while self.pos < len(self.field):
ad = self.getaddress()
if ad:
result += ad
else:
result.append(('', ''))
return result
def getaddress(self):
"""Parse the next address."""
self.commentlist = []
self.gotonext()
oldpos = self.pos
oldcl = self.commentlist
plist = self.getphraselist()
self.gotonext()
returnlist = []
if self.pos >= len(self.field):
# Bad email address technically, no domain.
if plist:
returnlist = [(SPACE.join(self.commentlist), plist[0])]
elif self.field[self.pos] in '.@':
# email address is just an addrspec
# this isn't very efficient since we start over
self.pos = oldpos
self.commentlist = oldcl
addrspec = self.getaddrspec()
returnlist = [(SPACE.join(self.commentlist), addrspec)]
elif self.field[self.pos] == ':':
# address is a group
returnlist = []
fieldlen = len(self.field)
self.pos += 1
while self.pos < len(self.field):
self.gotonext()
if self.pos < fieldlen and self.field[self.pos] == ';':
self.pos += 1
break
returnlist = returnlist + self.getaddress()
elif self.field[self.pos] == '<':
# Address is a phrase then a route addr
routeaddr = self.getrouteaddr()
if self.commentlist:
returnlist = [(SPACE.join(plist) + ' (' +
' '.join(self.commentlist) + ')', routeaddr)]
else:
returnlist = [(SPACE.join(plist), routeaddr)]
else:
if plist:
returnlist = [(SPACE.join(self.commentlist), plist[0])]
elif self.field[self.pos] in self.specials:
self.pos += 1
self.gotonext()
if self.pos < len(self.field) and self.field[self.pos] == ',':
self.pos += 1
return returnlist
def getrouteaddr(self):
"""Parse a route address (Return-path value).
This method just skips all the route stuff and returns the addrspec.
"""
if self.field[self.pos] != '<':
return
expectroute = False
self.pos += 1
self.gotonext()
adlist = ''
while self.pos < len(self.field):
if expectroute:
self.getdomain()
expectroute = False
elif self.field[self.pos] == '>':
self.pos += 1
break
elif self.field[self.pos] == '@':
self.pos += 1
expectroute = True
elif self.field[self.pos] == ':':
self.pos += 1
else:
adlist = self.getaddrspec()
self.pos += 1
break
self.gotonext()
return adlist
def getaddrspec(self):
"""Parse an RFC 2822 addr-spec."""
aslist = []
self.gotonext()
while self.pos < len(self.field):
preserve_ws = True
if self.field[self.pos] == '.':
if aslist and not aslist[-1].strip():
aslist.pop()
aslist.append('.')
self.pos += 1
preserve_ws = False
elif self.field[self.pos] == '"':
aslist.append('"%s"' % quote(self.getquote()))
elif self.field[self.pos] in self.atomends:
if aslist and not aslist[-1].strip():
aslist.pop()
break
else:
aslist.append(self.getatom())
ws = self.gotonext()
if preserve_ws and ws:
aslist.append(ws)
if self.pos >= len(self.field) or self.field[self.pos] != '@':
return EMPTYSTRING.join(aslist)
aslist.append('@')
self.pos += 1
self.gotonext()
return EMPTYSTRING.join(aslist) + self.getdomain()
def getdomain(self):
"""Get the complete domain name from an address."""
sdlist = []
while self.pos < len(self.field):
if self.field[self.pos] in self.LWS:
self.pos += 1
elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment())
elif self.field[self.pos] == '[':
sdlist.append(self.getdomainliteral())
elif self.field[self.pos] == '.':
self.pos += 1
sdlist.append('.')
elif self.field[self.pos] in self.atomends:
break
else:
sdlist.append(self.getatom())
return EMPTYSTRING.join(sdlist)
def getdelimited(self, beginchar, endchars, allowcomments=True):
"""Parse a header fragment delimited by special characters.
`beginchar' is the start character for the fragment.
If self is not looking at an instance of `beginchar' then
getdelimited returns the empty string.
`endchars' is a sequence of allowable end-delimiting characters.
Parsing stops when one of these is encountered.
If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
within the parsed fragment.
"""
if self.field[self.pos] != beginchar:
return ''
slist = ['']
quote = False
self.pos += 1
while self.pos < len(self.field):
if quote:
slist.append(self.field[self.pos])
quote = False
elif self.field[self.pos] in endchars:
self.pos += 1
break
elif allowcomments and self.field[self.pos] == '(':
slist.append(self.getcomment())
continue # have already advanced pos from getcomment
elif self.field[self.pos] == '\\':
quote = True
else:
slist.append(self.field[self.pos])
self.pos += 1
return EMPTYSTRING.join(slist)
def getquote(self):
"""Get a quote-delimited fragment from self's field."""
return self.getdelimited('"', '"\r', False)
def getcomment(self):
"""Get a parenthesis-delimited fragment from self's field."""
return self.getdelimited('(', ')\r', True)
def getdomainliteral(self):
"""Parse an RFC 2822 domain-literal."""
return '[%s]' % self.getdelimited('[', ']\r', False)
def getatom(self, atomends=None):
"""Parse an RFC 2822 atom.
Optional atomends specifies a different set of end token delimiters
(the default is to use self.atomends). This is used e.g. in
getphraselist() since phrase endings must not include the `.' (which
is legal in phrases)."""
atomlist = ['']
if atomends is None:
atomends = self.atomends
while self.pos < len(self.field):
if self.field[self.pos] in atomends:
break
else:
atomlist.append(self.field[self.pos])
self.pos += 1
return EMPTYSTRING.join(atomlist)
def getphraselist(self):
"""Parse a sequence of RFC 2822 phrases.
A phrase is a sequence of words, which are in turn either RFC 2822
atoms or quoted-strings. Phrases are canonicalized by squeezing all
runs of continuous whitespace into one space.
"""
plist = []
while self.pos < len(self.field):
if self.field[self.pos] in self.FWS:
self.pos += 1
elif self.field[self.pos] == '"':
plist.append(self.getquote())
elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment())
elif self.field[self.pos] in self.phraseends:
break
else:
plist.append(self.getatom(self.phraseends))
return plist
class AddressList(AddrlistClass):
"""An AddressList encapsulates a list of parsed RFC 2822 addresses."""
def __init__(self, field):
AddrlistClass.__init__(self, field)
if field:
self.addresslist = self.getaddrlist()
else:
self.addresslist = []
def __len__(self):
return len(self.addresslist)
def __add__(self, other):
# Set union
newaddr = AddressList(None)
newaddr.addresslist = self.addresslist[:]
for x in other.addresslist:
if not x in self.addresslist:
newaddr.addresslist.append(x)
return newaddr
def __iadd__(self, other):
# Set union, in-place
for x in other.addresslist:
if not x in self.addresslist:
self.addresslist.append(x)
return self
def __sub__(self, other):
# Set difference
newaddr = AddressList(None)
for x in self.addresslist:
if not x in other.addresslist:
newaddr.addresslist.append(x)
return newaddr
def __isub__(self, other):
# Set difference, in-place
for x in other.addresslist:
if x in self.addresslist:
self.addresslist.remove(x)
return self
def __getitem__(self, index):
# Make indexing, slices, and 'in' work
return self.addresslist[index]

359
upip/email/_policybase.py Normal file
View File

@ -0,0 +1,359 @@
"""Policy framework for the email package.
Allows fine grained feature control of how the package parses and emits data.
"""
import abc
from email import header
from email import charset as _charset
from email.utils import _has_surrogates
__all__ = [
'Policy',
'Compat32',
'compat32',
]
class _PolicyBase:
"""Policy Object basic framework.
This class is useless unless subclassed. A subclass should define
class attributes with defaults for any values that are to be
managed by the Policy object. The constructor will then allow
non-default values to be set for these attributes at instance
creation time. The instance will be callable, taking these same
attributes keyword arguments, and returning a new instance
identical to the called instance except for those values changed
by the keyword arguments. Instances may be added, yielding new
instances with any non-default values from the right hand
operand overriding those in the left hand operand. That is,
A + B == A(<non-default values of B>)
The repr of an instance can be used to reconstruct the object
if and only if the repr of the values can be used to reconstruct
those values.
"""
def __init__(self, **kw):
"""Create new Policy, possibly overriding some defaults.
See class docstring for a list of overridable attributes.
"""
for name, value in kw.items():
if hasattr(self, name):
super(_PolicyBase,self).__setattr__(name, value)
else:
raise TypeError(
"{!r} is an invalid keyword argument for {}".format(
name, self.__class__.__name__))
def __repr__(self):
args = [ "{}={!r}".format(name, value)
for name, value in self.__dict__.items() ]
return "{}({})".format(self.__class__.__name__, ', '.join(args))
def clone(self, **kw):
"""Return a new instance with specified attributes changed.
The new instance has the same attribute values as the current object,
except for the changes passed in as keyword arguments.
"""
newpolicy = self.__class__.__new__(self.__class__)
for attr, value in self.__dict__.items():
object.__setattr__(newpolicy, attr, value)
for attr, value in kw.items():
if not hasattr(self, attr):
raise TypeError(
"{!r} is an invalid keyword argument for {}".format(
attr, self.__class__.__name__))
object.__setattr__(newpolicy, attr, value)
return newpolicy
def __setattr__(self, name, value):
if hasattr(self, name):
msg = "{!r} object attribute {!r} is read-only"
else:
msg = "{!r} object has no attribute {!r}"
raise AttributeError(msg.format(self.__class__.__name__, name))
def __add__(self, other):
"""Non-default values from right operand override those from left.
The object returned is a new instance of the subclass.
"""
return self.clone(**other.__dict__)
def _append_doc(doc, added_doc):
doc = doc.rsplit('\n', 1)[0]
added_doc = added_doc.split('\n', 1)[1]
return doc + '\n' + added_doc
def _extend_docstrings(cls):
return cls
if cls.__doc__ and cls.__doc__.startswith('+'):
cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__)
for name, attr in cls.__dict__.items():
if attr.__doc__ and attr.__doc__.startswith('+'):
for c in (c for base in cls.__bases__ for c in base.mro()):
doc = getattr(getattr(c, name), '__doc__')
if doc:
attr.__doc__ = _append_doc(doc, attr.__doc__)
break
return cls
class Policy(_PolicyBase):#, metaclass=abc.ABCMeta):
r"""Controls for how messages are interpreted and formatted.
Most of the classes and many of the methods in the email package accept
Policy objects as parameters. A Policy object contains a set of values and
functions that control how input is interpreted and how output is rendered.
For example, the parameter 'raise_on_defect' controls whether or not an RFC
violation results in an error being raised or not, while 'max_line_length'
controls the maximum length of output lines when a Message is serialized.
Any valid attribute may be overridden when a Policy is created by passing
it as a keyword argument to the constructor. Policy objects are immutable,
but a new Policy object can be created with only certain values changed by
calling the Policy instance with keyword arguments. Policy objects can
also be added, producing a new Policy object in which the non-default
attributes set in the right hand operand overwrite those specified in the
left operand.
Settable attributes:
raise_on_defect -- If true, then defects should be raised as errors.
Default: False.
linesep -- string containing the value to use as separation
between output lines. Default '\n'.
cte_type -- Type of allowed content transfer encodings
7bit -- ASCII only
8bit -- Content-Transfer-Encoding: 8bit is allowed
Default: 8bit. Also controls the disposition of
(RFC invalid) binary data in headers; see the
documentation of the binary_fold method.
max_line_length -- maximum length of lines, excluding 'linesep',
during serialization. None or 0 means no line
wrapping is done. Default is 78.
"""
raise_on_defect = False
linesep = '\n'
cte_type = '8bit'
max_line_length = 78
def handle_defect(self, obj, defect):
"""Based on policy, either raise defect or call register_defect.
handle_defect(obj, defect)
defect should be a Defect subclass, but in any case must be an
Exception subclass. obj is the object on which the defect should be
registered if it is not raised. If the raise_on_defect is True, the
defect is raised as an error, otherwise the object and the defect are
passed to register_defect.
This method is intended to be called by parsers that discover defects.
The email package parsers always call it with Defect instances.
"""
if self.raise_on_defect:
raise defect
self.register_defect(obj, defect)
def register_defect(self, obj, defect):
"""Record 'defect' on 'obj'.
Called by handle_defect if raise_on_defect is False. This method is
part of the Policy API so that Policy subclasses can implement custom
defect handling. The default implementation calls the append method of
the defects attribute of obj. The objects used by the email package by
default that get passed to this method will always have a defects
attribute with an append method.
"""
obj.defects.append(defect)
def header_max_count(self, name):
"""Return the maximum allowed number of headers named 'name'.
Called when a header is added to a Message object. If the returned
value is not 0 or None, and there are already a number of headers with
the name 'name' equal to the value returned, a ValueError is raised.
Because the default behavior of Message's __setitem__ is to append the
value to the list of headers, it is easy to create duplicate headers
without realizing it. This method allows certain headers to be limited
in the number of instances of that header that may be added to a
Message programmatically. (The limit is not observed by the parser,
which will faithfully produce as many headers as exist in the message
being parsed.)
The default implementation returns None for all header names.
"""
return None
@abc.abstractmethod
def header_source_parse(self, sourcelines):
"""Given a list of linesep terminated strings constituting the lines of
a single header, return the (name, value) tuple that should be stored
in the model. The input lines should retain their terminating linesep
characters. The lines passed in by the email package may contain
surrogateescaped binary data.
"""
raise NotImplementedError
@abc.abstractmethod
def header_store_parse(self, name, value):
"""Given the header name and the value provided by the application
program, return the (name, value) that should be stored in the model.
"""
raise NotImplementedError
@abc.abstractmethod
def header_fetch_parse(self, name, value):
"""Given the header name and the value from the model, return the value
to be returned to the application program that is requesting that
header. The value passed in by the email package may contain
surrogateescaped binary data if the lines were parsed by a BytesParser.
The returned value should not contain any surrogateescaped data.
"""
raise NotImplementedError
@abc.abstractmethod
def fold(self, name, value):
"""Given the header name and the value from the model, return a string
containing linesep characters that implement the folding of the header
according to the policy controls. The value passed in by the email
package may contain surrogateescaped binary data if the lines were
parsed by a BytesParser. The returned value should not contain any
surrogateescaped data.
"""
raise NotImplementedError
@abc.abstractmethod
def fold_binary(self, name, value):
"""Given the header name and the value from the model, return binary
data containing linesep characters that implement the folding of the
header according to the policy controls. The value passed in by the
email package may contain surrogateescaped binary data.
"""
raise NotImplementedError
@_extend_docstrings
class Compat32(Policy):
"""+
This particular policy is the backward compatibility Policy. It
replicates the behavior of the email package version 5.1.
"""
def _sanitize_header(self, name, value):
# If the header value contains surrogates, return a Header using
# the unknown-8bit charset to encode the bytes as encoded words.
if not isinstance(value, str):
# Assume it is already a header object
return value
if _has_surrogates(value):
return header.Header(value, charset=_charset.UNKNOWN8BIT,
header_name=name)
else:
return value
def header_source_parse(self, sourcelines):
"""+
The name is parsed as everything up to the ':' and returned unmodified.
The value is determined by stripping leading whitespace off the
remainder of the first line, joining all subsequent lines together, and
stripping any trailing carriage return or linefeed characters.
"""
name, value = sourcelines[0].split(':', 1)
value = value.lstrip(' \t') + ''.join(sourcelines[1:])
return (name, value.rstrip('\r\n'))
def header_store_parse(self, name, value):
"""+
The name and value are returned unmodified.
"""
return (name, value)
def header_fetch_parse(self, name, value):
"""+
If the value contains binary data, it is converted into a Header object
using the unknown-8bit charset. Otherwise it is returned unmodified.
"""
return self._sanitize_header(name, value)
def fold(self, name, value):
"""+
Headers are folded using the Header folding algorithm, which preserves
existing line breaks in the value, and wraps each resulting line to the
max_line_length. Non-ASCII binary data are CTE encoded using the
unknown-8bit charset.
"""
return self._fold(name, value, sanitize=True)
def fold_binary(self, name, value):
"""+
Headers are folded using the Header folding algorithm, which preserves
existing line breaks in the value, and wraps each resulting line to the
max_line_length. If cte_type is 7bit, non-ascii binary data is CTE
encoded using the unknown-8bit charset. Otherwise the original source
header is used, with its existing line breaks and/or binary data.
"""
folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
return folded.encode('ascii', 'surrogateescape')
def _fold(self, name, value, sanitize):
parts = []
parts.append('%s: ' % name)
if isinstance(value, str):
if _has_surrogates(value):
if sanitize:
h = header.Header(value,
charset=_charset.UNKNOWN8BIT,
header_name=name)
else:
# If we have raw 8bit data in a byte string, we have no idea
# what the encoding is. There is no safe way to split this
# string. If it's ascii-subset, then we could do a normal
# ascii split, but if it's multibyte then we could break the
# string. There's no way to know so the least harm seems to
# be to not split the string and risk it being too long.
parts.append(value)
h = None
else:
h = header.Header(value, header_name=name)
else:
# Assume it is a Header-like object.
h = value
if h is not None:
parts.append(h.encode(linesep=self.linesep,
maxlinelen=self.max_line_length))
parts.append(self.linesep)
return ''.join(parts)
compat32 = Compat32()

119
upip/email/base64mime.py Normal file
View File

@ -0,0 +1,119 @@
# Copyright (C) 2002-2007 Python Software Foundation
# Author: Ben Gertzfield
# Contact: email-sig@python.org
"""Base64 content transfer encoding per RFCs 2045-2047.
This module handles the content transfer encoding method defined in RFC 2045
to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
characters encoding known as Base64.
It is used in the MIME standards for email to attach images, audio, and text
using some 8-bit character sets to messages.
This module provides an interface to encode and decode both headers and bodies
with Base64 encoding.
RFC 2045 defines a method for including character set information in an
`encoded-word' in a header. This method is commonly used for 8-bit real names
in To:, From:, Cc:, etc. fields, as well as Subject: lines.
This module does not do the line wrapping or end-of-line character conversion
necessary for proper internationalized headers; it only does dumb encoding and
decoding. To deal with the various line wrapping issues, use the email.header
module.
"""
__all__ = [
'body_decode',
'body_encode',
'decode',
'decodestring',
'header_encode',
'header_length',
]
from base64 import b64encode
from binascii import b2a_base64, a2b_base64
CRLF = '\r\n'
NL = '\n'
EMPTYSTRING = ''
# See also Charset.py
MISC_LEN = 7
# Helpers
def header_length(bytearray):
"""Return the length of s when it is encoded with base64."""
groups_of_3, leftover = divmod(len(bytearray), 3)
# 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
n = groups_of_3 * 4
if leftover:
n += 4
return n
def header_encode(header_bytes, charset='iso-8859-1'):
"""Encode a single header line with Base64 encoding in a given charset.
charset names the character set to use to encode the header. It defaults
to iso-8859-1. Base64 encoding is defined in RFC 2045.
"""
if not header_bytes:
return ""
if isinstance(header_bytes, str):
header_bytes = header_bytes.encode(charset)
encoded = b64encode(header_bytes).decode("ascii")
return '=?%s?b?%s?=' % (charset, encoded)
def body_encode(s, maxlinelen=76, eol=NL):
r"""Encode a string with base64.
Each line will be wrapped at, at most, maxlinelen characters (defaults to
76 characters).
Each line of encoded text will end with eol, which defaults to "\n". Set
this to "\r\n" if you will be using the result of this function directly
in an email.
"""
if not s:
return s
encvec = []
max_unencoded = maxlinelen * 3 // 4
for i in range(0, len(s), max_unencoded):
# BAW: should encode() inherit b2a_base64()'s dubious behavior in
# adding a newline to the encoded string?
enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
if enc.endswith(NL) and eol != NL:
enc = enc[:-1] + eol
encvec.append(enc)
return EMPTYSTRING.join(encvec)
def decode(string):
"""Decode a raw base64 string, returning a bytes object.
This function does not parse a full MIME header value encoded with
base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
level email.header class for that functionality.
"""
if not string:
return bytes()
elif isinstance(string, str):
return a2b_base64(string.encode('raw-unicode-escape'))
else:
return a2b_base64(string)
# For convenience and backwards compatibility w/ standard base64 module
body_decode = decode
decodestring = decode

412
upip/email/charset.py Normal file
View File

@ -0,0 +1,412 @@
# Copyright (C) 2001-2007 Python Software Foundation
# Author: Ben Gertzfield, Barry Warsaw
# Contact: email-sig@python.org
__all__ = [
'Charset',
'add_alias',
'add_charset',
'add_codec',
]
from functools import partial
import email.base64mime
import email.quoprimime
from email import errors
from email.encoders import encode_7or8bit
# Flags for types of header encodings
QP = 1 # Quoted-Printable
BASE64 = 2 # Base64
SHORTEST = 3 # the shorter of QP and base64, but only for headers
# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
RFC2047_CHROME_LEN = 7
DEFAULT_CHARSET = 'us-ascii'
UNKNOWN8BIT = 'unknown-8bit'
EMPTYSTRING = ''
# Defaults
CHARSETS = {
# input header enc body enc output conv
'iso-8859-1': (QP, QP, None),
'iso-8859-2': (QP, QP, None),
'iso-8859-3': (QP, QP, None),
'iso-8859-4': (QP, QP, None),
# iso-8859-5 is Cyrillic, and not especially used
# iso-8859-6 is Arabic, also not particularly used
# iso-8859-7 is Greek, QP will not make it readable
# iso-8859-8 is Hebrew, QP will not make it readable
'iso-8859-9': (QP, QP, None),
'iso-8859-10': (QP, QP, None),
# iso-8859-11 is Thai, QP will not make it readable
'iso-8859-13': (QP, QP, None),
'iso-8859-14': (QP, QP, None),
'iso-8859-15': (QP, QP, None),
'iso-8859-16': (QP, QP, None),
'windows-1252':(QP, QP, None),
'viscii': (QP, QP, None),
'us-ascii': (None, None, None),
'big5': (BASE64, BASE64, None),
'gb2312': (BASE64, BASE64, None),
'euc-jp': (BASE64, None, 'iso-2022-jp'),
'shift_jis': (BASE64, None, 'iso-2022-jp'),
'iso-2022-jp': (BASE64, None, None),
'koi8-r': (BASE64, BASE64, None),
'utf-8': (SHORTEST, BASE64, 'utf-8'),
}
# Aliases for other commonly-used names for character sets. Map
# them to the real ones used in email.
ALIASES = {
'latin_1': 'iso-8859-1',
'latin-1': 'iso-8859-1',
'latin_2': 'iso-8859-2',
'latin-2': 'iso-8859-2',
'latin_3': 'iso-8859-3',
'latin-3': 'iso-8859-3',
'latin_4': 'iso-8859-4',
'latin-4': 'iso-8859-4',
'latin_5': 'iso-8859-9',
'latin-5': 'iso-8859-9',
'latin_6': 'iso-8859-10',
'latin-6': 'iso-8859-10',
'latin_7': 'iso-8859-13',
'latin-7': 'iso-8859-13',
'latin_8': 'iso-8859-14',
'latin-8': 'iso-8859-14',
'latin_9': 'iso-8859-15',
'latin-9': 'iso-8859-15',
'latin_10':'iso-8859-16',
'latin-10':'iso-8859-16',
'cp949': 'ks_c_5601-1987',
'euc_jp': 'euc-jp',
'euc_kr': 'euc-kr',
'ascii': 'us-ascii',
}
# Map charsets to their Unicode codec strings.
CODEC_MAP = {
'gb2312': 'eucgb2312_cn',
'big5': 'big5_tw',
# Hack: We don't want *any* conversion for stuff marked us-ascii, as all
# sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
# Let that stuff pass through without conversion to/from Unicode.
'us-ascii': None,
}
# Convenience functions for extending the above mappings
def add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
"""Add character set properties to the global registry.
charset is the input character set, and must be the canonical name of a
character set.
Optional header_enc and body_enc is either Charset.QP for
quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for
the shortest of qp or base64 encoding, or None for no encoding. SHORTEST
is only valid for header_enc. It describes how message headers and
message bodies in the input charset are to be encoded. Default is no
encoding.
Optional output_charset is the character set that the output should be
in. Conversions will proceed from input charset, to Unicode, to the
output charset when the method Charset.convert() is called. The default
is to output in the same character set as the input.
Both input_charset and output_charset must have Unicode codec entries in
the module's charset-to-codec mapping; use add_codec(charset, codecname)
to add codecs the module does not know about. See the codecs module's
documentation for more information.
"""
if body_enc == SHORTEST:
raise ValueError('SHORTEST not allowed for body_enc')
CHARSETS[charset] = (header_enc, body_enc, output_charset)
def add_alias(alias, canonical):
"""Add a character set alias.
alias is the alias name, e.g. latin-1
canonical is the character set's canonical name, e.g. iso-8859-1
"""
ALIASES[alias] = canonical
def add_codec(charset, codecname):
"""Add a codec that map characters in the given charset to/from Unicode.
charset is the canonical name of a character set. codecname is the name
of a Python codec, as appropriate for the second argument to the unicode()
built-in, or to the encode() method of a Unicode string.
"""
CODEC_MAP[charset] = codecname
# Convenience function for encoding strings, taking into account
# that they might be unknown-8bit (ie: have surrogate-escaped bytes)
def _encode(string, codec):
if codec == UNKNOWN8BIT:
return string.encode('ascii', 'surrogateescape')
else:
return string.encode(codec)
class Charset:
"""Map character sets to their email properties.
This class provides information about the requirements imposed on email
for a specific character set. It also provides convenience routines for
converting between character sets, given the availability of the
applicable codecs. Given a character set, it will do its best to provide
information on how to use that character set in an email in an
RFC-compliant way.
Certain character sets must be encoded with quoted-printable or base64
when used in email headers or bodies. Certain character sets must be
converted outright, and are not allowed in email. Instances of this
module expose the following information about a character set:
input_charset: The initial character set specified. Common aliases
are converted to their `official' email names (e.g. latin_1
is converted to iso-8859-1). Defaults to 7-bit us-ascii.
header_encoding: If the character set must be encoded before it can be
used in an email header, this attribute will be set to
Charset.QP (for quoted-printable), Charset.BASE64 (for
base64 encoding), or Charset.SHORTEST for the shortest of
QP or BASE64 encoding. Otherwise, it will be None.
body_encoding: Same as header_encoding, but describes the encoding for the
mail message's body, which indeed may be different than the
header encoding. Charset.SHORTEST is not allowed for
body_encoding.
output_charset: Some character sets must be converted before they can be
used in email headers or bodies. If the input_charset is
one of them, this attribute will contain the name of the
charset output will be converted to. Otherwise, it will
be None.
input_codec: The name of the Python codec used to convert the
input_charset to Unicode. If no conversion codec is
necessary, this attribute will be None.
output_codec: The name of the Python codec used to convert Unicode
to the output_charset. If no conversion codec is necessary,
this attribute will have the same value as the input_codec.
"""
def __init__(self, input_charset=DEFAULT_CHARSET):
# RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to
# unicode because its .lower() is locale insensitive. If the argument
# is already a unicode, we leave it at that, but ensure that the
# charset is ASCII, as the standard (RFC XXX) requires.
try:
if isinstance(input_charset, str):
input_charset.encode('ascii')
else:
input_charset = str(input_charset, 'ascii')
except UnicodeError:
raise errors.CharsetError(input_charset)
input_charset = input_charset.lower()
# Set the input charset after filtering through the aliases
self.input_charset = ALIASES.get(input_charset, input_charset)
# We can try to guess which encoding and conversion to use by the
# charset_map dictionary. Try that first, but let the user override
# it.
henc, benc, conv = CHARSETS.get(self.input_charset,
(SHORTEST, BASE64, None))
if not conv:
conv = self.input_charset
# Set the attributes, allowing the arguments to override the default.
self.header_encoding = henc
self.body_encoding = benc
self.output_charset = ALIASES.get(conv, conv)
# Now set the codecs. If one isn't defined for input_charset,
# guess and try a Unicode codec with the same name as input_codec.
self.input_codec = CODEC_MAP.get(self.input_charset,
self.input_charset)
self.output_codec = CODEC_MAP.get(self.output_charset,
self.output_charset)
def __str__(self):
return self.input_charset.lower()
__repr__ = __str__
def __eq__(self, other):
return str(self) == str(other).lower()
def __ne__(self, other):
return not self.__eq__(other)
def get_body_encoding(self):
"""Return the content-transfer-encoding used for body encoding.
This is either the string `quoted-printable' or `base64' depending on
the encoding used, or it is a function in which case you should call
the function with a single argument, the Message object being
encoded. The function should then set the Content-Transfer-Encoding
header itself to whatever is appropriate.
Returns "quoted-printable" if self.body_encoding is QP.
Returns "base64" if self.body_encoding is BASE64.
Returns conversion function otherwise.
"""
assert self.body_encoding != SHORTEST
if self.body_encoding == QP:
return 'quoted-printable'
elif self.body_encoding == BASE64:
return 'base64'
else:
return encode_7or8bit
def get_output_charset(self):
"""Return the output character set.
This is self.output_charset if that is not None, otherwise it is
self.input_charset.
"""
return self.output_charset or self.input_charset
def header_encode(self, string):
"""Header-encode a string by converting it first to bytes.
The type of encoding (base64 or quoted-printable) will be based on
this charset's `header_encoding`.
:param string: A unicode string for the header. It must be possible
to encode this string to bytes using the character set's
output codec.
:return: The encoded string, with RFC 2047 chrome.
"""
codec = self.output_codec or 'us-ascii'
header_bytes = _encode(string, codec)
# 7bit/8bit encodings return the string unchanged (modulo conversions)
encoder_module = self._get_encoder(header_bytes)
if encoder_module is None:
return string
return encoder_module.header_encode(header_bytes, codec)
def header_encode_lines(self, string, maxlengths):
"""Header-encode a string by converting it first to bytes.
This is similar to `header_encode()` except that the string is fit
into maximum line lengths as given by the argument.
:param string: A unicode string for the header. It must be possible
to encode this string to bytes using the character set's
output codec.
:param maxlengths: Maximum line length iterator. Each element
returned from this iterator will provide the next maximum line
length. This parameter is used as an argument to built-in next()
and should never be exhausted. The maximum line lengths should
not count the RFC 2047 chrome. These line lengths are only a
hint; the splitter does the best it can.
:return: Lines of encoded strings, each with RFC 2047 chrome.
"""
# See which encoding we should use.
codec = self.output_codec or 'us-ascii'
header_bytes = _encode(string, codec)
encoder_module = self._get_encoder(header_bytes)
encoder = partial(encoder_module.header_encode, charset=codec)
# Calculate the number of characters that the RFC 2047 chrome will
# contribute to each line.
charset = self.get_output_charset()
extra = len(charset) + RFC2047_CHROME_LEN
# Now comes the hard part. We must encode bytes but we can't split on
# bytes because some character sets are variable length and each
# encoded word must stand on its own. So the problem is you have to
# encode to bytes to figure out this word's length, but you must split
# on characters. This causes two problems: first, we don't know how
# many octets a specific substring of unicode characters will get
# encoded to, and second, we don't know how many ASCII characters
# those octets will get encoded to. Unless we try it. Which seems
# inefficient. In the interest of being correct rather than fast (and
# in the hope that there will be few encoded headers in any such
# message), brute force it. :(
lines = []
current_line = []
maxlen = next(maxlengths) - extra
for character in string:
current_line.append(character)
this_line = EMPTYSTRING.join(current_line)
length = encoder_module.header_length(_encode(this_line, charset))
if length > maxlen:
# This last character doesn't fit so pop it off.
current_line.pop()
# Does nothing fit on the first line?
if not lines and not current_line:
lines.append(None)
else:
separator = (' ' if lines else '')
joined_line = EMPTYSTRING.join(current_line)
header_bytes = _encode(joined_line, codec)
lines.append(encoder(header_bytes))
current_line = [character]
maxlen = next(maxlengths) - extra
joined_line = EMPTYSTRING.join(current_line)
header_bytes = _encode(joined_line, codec)
lines.append(encoder(header_bytes))
return lines
def _get_encoder(self, header_bytes):
if self.header_encoding == BASE64:
return email.base64mime
elif self.header_encoding == QP:
return email.quoprimime
elif self.header_encoding == SHORTEST:
len64 = email.base64mime.header_length(header_bytes)
lenqp = email.quoprimime.header_length(header_bytes)
if len64 < lenqp:
return email.base64mime
else:
return email.quoprimime
else:
return None
def body_encode(self, string):
"""Body-encode a string by converting it first to bytes.
The type of encoding (base64 or quoted-printable) will be based on
self.body_encoding. If body_encoding is None, we assume the
output charset is a 7bit encoding, so re-encoding the decoded
string using the ascii codec produces the correct string version
of the content.
"""
# 7bit/8bit encodings return the string unchanged (module conversions)
if self.body_encoding is BASE64:
if isinstance(string, str):
string = string.encode(self.output_charset)
return email.base64mime.body_encode(string)
elif self.body_encoding is QP:
# quopromime.body_encode takes a string, but operates on it as if
# it were a list of byte codes. For a (minimal) history on why
# this is so, see changeset 0cf700464177. To correctly encode a
# character set, then, we must turn it into pseudo bytes via the
# latin1 charset, which will encode any byte as a single code point
# between 0 and 255, which is what body_encode is expecting.
#
# Note that this clause doesn't handle the case of a _payload that
# is already bytes. It never did, and the semantics of _payload
# being bytes has never been nailed down, so fixing that is a
# longer term TODO.
if isinstance(string, str):
string = string.encode(self.output_charset).decode('latin1')
return email.quoprimime.body_encode(string)
else:
if isinstance(string, str):
string = string.encode(self.output_charset).decode('ascii')
return string

78
upip/email/encoders.py Normal file
View File

@ -0,0 +1,78 @@
# Copyright (C) 2001-2006 Python Software Foundation
# Author: Barry Warsaw
# Contact: email-sig@python.org
"""Encodings and related functions."""
__all__ = [
'encode_7or8bit',
'encode_base64',
'encode_noop',
'encode_quopri',
]
from base64 import encodebytes as _bencode
from quopri import encodestring as _encodestring
def _qencode(s):
enc = _encodestring(s, quotetabs=True)
# Must encode spaces, which quopri.encodestring() doesn't do
return enc.replace(b' ', b'=20')
def encode_base64(msg):
"""Encode the message's payload in Base64.
Also, add an appropriate Content-Transfer-Encoding header.
"""
orig = msg.get_payload(decode=True)
encdata = str(_bencode(orig), 'ascii')
msg.set_payload(encdata)
msg['Content-Transfer-Encoding'] = 'base64'
def encode_quopri(msg):
"""Encode the message's payload in quoted-printable.
Also, add an appropriate Content-Transfer-Encoding header.
"""
orig = msg.get_payload(decode=True)
encdata = _qencode(orig)
msg.set_payload(encdata)
msg['Content-Transfer-Encoding'] = 'quoted-printable'
def encode_7or8bit(msg):
"""Set the Content-Transfer-Encoding header to 7bit or 8bit."""
orig = msg.get_payload(decode=True)
if orig is None:
# There's no payload. For backwards compatibility we use 7bit
msg['Content-Transfer-Encoding'] = '7bit'
return
# We play a trick to make this go fast. If encoding/decode to ASCII
# succeeds, we know the data must be 7bit, otherwise treat it as 8bit.
try:
if isinstance(orig, str):
orig.encode('ascii')
else:
orig.decode('ascii')
except UnicodeError:
charset = msg.get_charset()
output_cset = charset and charset.output_charset
# iso-2022-* is non-ASCII but encodes to a 7-bit representation
if output_cset and output_cset.lower().startswith('iso-2022-'):
msg['Content-Transfer-Encoding'] = '7bit'
else:
msg['Content-Transfer-Encoding'] = '8bit'
else:
msg['Content-Transfer-Encoding'] = '7bit'
def encode_noop(msg):
"""Do nothing."""

107
upip/email/errors.py Normal file
View File

@ -0,0 +1,107 @@
# Copyright (C) 2001-2006 Python Software Foundation
# Author: Barry Warsaw
# Contact: email-sig@python.org
"""email package exception classes."""
class MessageError(Exception):
"""Base class for errors in the email package."""
class MessageParseError(MessageError):
"""Base class for message parsing errors."""
class HeaderParseError(MessageParseError):
"""Error while parsing headers."""
class BoundaryError(MessageParseError):
"""Couldn't find terminating boundary."""
class MultipartConversionError(MessageError):#, TypeError):
"""Conversion to a multipart is prohibited."""
class CharsetError(MessageError):
"""An illegal charset was given."""
# These are parsing defects which the parser was able to work around.
class MessageDefect(ValueError):
"""Base class for a message defect."""
def __init__(self, line=None):
if line is not None:
super().__init__(line)
self.line = line
class NoBoundaryInMultipartDefect(MessageDefect):
"""A message claimed to be a multipart but had no boundary parameter."""
class StartBoundaryNotFoundDefect(MessageDefect):
"""The claimed start boundary was never found."""
class CloseBoundaryNotFoundDefect(MessageDefect):
"""A start boundary was found, but not the corresponding close boundary."""
class FirstHeaderLineIsContinuationDefect(MessageDefect):
"""A message had a continuation line as its first header line."""
class MisplacedEnvelopeHeaderDefect(MessageDefect):
"""A 'Unix-from' header was found in the middle of a header block."""
class MissingHeaderBodySeparatorDefect(MessageDefect):
"""Found line with no leading whitespace and no colon before blank line."""
# XXX: backward compatibility, just in case (it was never emitted).
MalformedHeaderDefect = MissingHeaderBodySeparatorDefect
class MultipartInvariantViolationDefect(MessageDefect):
"""A message claimed to be a multipart but no subparts were found."""
class InvalidMultipartContentTransferEncodingDefect(MessageDefect):
"""An invalid content transfer encoding was set on the multipart itself."""
class UndecodableBytesDefect(MessageDefect):
"""Header contained bytes that could not be decoded"""
class InvalidBase64PaddingDefect(MessageDefect):
"""base64 encoded sequence had an incorrect length"""
class InvalidBase64CharactersDefect(MessageDefect):
"""base64 encoded sequence had characters not in base64 alphabet"""
# These errors are specific to header parsing.
class HeaderDefect(MessageDefect):
"""Base class for a header defect."""
def __init__(self, *args, **kw):
super().__init__(*args, **kw)
class InvalidHeaderDefect(HeaderDefect):
"""Header is not valid, message gives details."""
class HeaderMissingRequiredValue(HeaderDefect):
"""A header that must have a value had none"""
class NonPrintableDefect(HeaderDefect):
"""ASCII characters outside the ascii-printable range found"""
def __init__(self, non_printables):
super().__init__(non_printables)
self.non_printables = non_printables
def __str__(self):
return ("the following ASCII non-printables found in header: "
"{}".format(self.non_printables))
class ObsoleteHeaderDefect(HeaderDefect):
"""Header uses syntax declared obsolete by RFC 5322"""
class NonASCIILocalPartDefect(HeaderDefect):
"""local_part contains non-ASCII characters"""
# This defect only occurs during unicode parsing, not when
# parsing messages decoded from binary.

516
upip/email/feedparser.py Normal file
View File

@ -0,0 +1,516 @@
# Copyright (C) 2004-2006 Python Software Foundation
# Authors: Baxter, Wouters and Warsaw
# Contact: email-sig@python.org
"""FeedParser - An email feed parser.
The feed parser implements an interface for incrementally parsing an email
message, line by line. This has advantages for certain applications, such as
those reading email messages off a socket.
FeedParser.feed() is the primary interface for pushing new data into the
parser. It returns when there's nothing more it can do with the available
data. When you have no more data to push into the parser, call .close().
This completes the parsing and returns the root message object.
The other advantage of this parser is that it will never raise a parsing
exception. Instead, when it finds something unexpected, it adds a 'defect' to
the current message. Defects are just instances that live on the message
object's .defects attribute.
"""
__all__ = ['FeedParser', 'BytesFeedParser']
import re
from email import errors
from email import message
from email._policybase import compat32
NLCRE = re.compile('\r\n|\r|\n')
NLCRE_bol = re.compile('(\r\n|\r|\n)')
NLCRE_eol = re.compile('(\r\n|\r|\n)\Z')
NLCRE_crack = re.compile('(\r\n|\r|\n)')
# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
# except controls, SP, and ":".
headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])')
EMPTYSTRING = ''
NL = '\n'
NeedMoreData = object()
class BufferedSubFile(object):
"""A file-ish object that can have new data loaded into it.
You can also push and pop line-matching predicates onto a stack. When the
current predicate matches the current line, a false EOF response
(i.e. empty string) is returned instead. This lets the parser adhere to a
simple abstraction -- it parses until EOF closes the current message.
"""
def __init__(self):
# The last partial line pushed into this object.
self._partial = ''
# The list of full, pushed lines, in reverse order
self._lines = []
# The stack of false-EOF checking predicates.
self._eofstack = []
# A flag indicating whether the file has been closed or not.
self._closed = False
def push_eof_matcher(self, pred):
self._eofstack.append(pred)
def pop_eof_matcher(self):
return self._eofstack.pop()
def close(self):
# Don't forget any trailing partial line.
self._lines.append(self._partial)
self._partial = ''
self._closed = True
def readline(self):
if not self._lines:
if self._closed:
return ''
return NeedMoreData
# Pop the line off the stack and see if it matches the current
# false-EOF predicate.
line = self._lines.pop()
# RFC 2046, section 5.1.2 requires us to recognize outer level
# boundaries at any level of inner nesting. Do this, but be sure it's
# in the order of most to least nested.
for ateof in self._eofstack[::-1]:
if ateof(line):
# We're at the false EOF. But push the last line back first.
self._lines.append(line)
return ''
return line
def unreadline(self, line):
# Let the consumer push a line back into the buffer.
assert line is not NeedMoreData
self._lines.append(line)
def push(self, data):
"""Push some new data into this object."""
# Handle any previous leftovers
data, self._partial = self._partial + data, ''
# Crack into lines, but preserve the newlines on the end of each
parts = NLCRE_crack.split(data)
# The *ahem* interesting behaviour of re.split when supplied grouping
# parentheses is that the last element of the resulting list is the
# data after the final RE. In the case of a NL/CR terminated string,
# this is the empty string.
self._partial = parts.pop()
#GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r:
# is there a \n to follow later?
if not self._partial and parts and parts[-1].endswith('\r'):
self._partial = parts.pop(-2)+parts.pop()
# parts is a list of strings, alternating between the line contents
# and the eol character(s). Gather up a list of lines after
# re-attaching the newlines.
lines = []
for i in range(len(parts) // 2):
lines.append(parts[i*2] + parts[i*2+1])
self.pushlines(lines)
def pushlines(self, lines):
# Reverse and insert at the front of the lines.
self._lines[:0] = lines[::-1]
def __iter__(self):
return self
def __next__(self):
line = self.readline()
if line == '':
raise StopIteration
return line
class FeedParser:
"""A feed-style parser of email."""
def __init__(self, _factory=message.Message, policy=compat32):
"""_factory is called with no arguments to create a new message obj
The policy keyword specifies a policy object that controls a number of
aspects of the parser's operation. The default policy maintains
backward compatibility.
"""
self._factory = _factory
self.policy = policy
try:
_factory(policy=self.policy)
self._factory_kwds = lambda: {'policy': self.policy}
except TypeError:
# Assume this is an old-style factory
self._factory_kwds = lambda: {}
self._input = BufferedSubFile()
self._msgstack = []
self._parse = self._parsegen().__next__
self._cur = None
self._last = None
self._headersonly = False
# Non-public interface for supporting Parser's headersonly flag
def _set_headersonly(self):
self._headersonly = True
def feed(self, data):
"""Push more data into the parser."""
self._input.push(data)
self._call_parse()
def _call_parse(self):
try:
self._parse()
except StopIteration:
pass
def close(self):
"""Parse all remaining data and return the root message object."""
self._input.close()
self._call_parse()
root = self._pop_message()
assert not self._msgstack
# Look for final set of defects
if root.get_content_maintype() == 'multipart' \
and not root.is_multipart():
defect = errors.MultipartInvariantViolationDefect()
self.policy.handle_defect(root, defect)
return root
def _new_message(self):
msg = self._factory(**self._factory_kwds())
if self._cur and self._cur.get_content_type() == 'multipart/digest':
msg.set_default_type('message/rfc822')
if self._msgstack:
self._msgstack[-1].attach(msg)
self._msgstack.append(msg)
self._cur = msg
self._last = msg
def _pop_message(self):
retval = self._msgstack.pop()
if self._msgstack:
self._cur = self._msgstack[-1]
else:
self._cur = None
return retval
def _parsegen(self):
# Create a new message and start by parsing headers.
self._new_message()
headers = []
# Collect the headers, searching for a line that doesn't match the RFC
# 2822 header or continuation pattern (including an empty line).
for line in self._input:
if line is NeedMoreData:
yield NeedMoreData
continue
if not headerRE.match(line):
# If we saw the RFC defined header/body separator
# (i.e. newline), just throw it away. Otherwise the line is
# part of the body so push it back.
if not NLCRE.match(line):
defect = errors.MissingHeaderBodySeparatorDefect()
self.policy.handle_defect(self._cur, defect)
self._input.unreadline(line)
break
headers.append(line)
# Done with the headers, so parse them and figure out what we're
# supposed to see in the body of the message.
self._parse_headers(headers)
# Headers-only parsing is a backwards compatibility hack, which was
# necessary in the older parser, which could raise errors. All
# remaining lines in the input are thrown into the message body.
if self._headersonly:
lines = []
while True:
line = self._input.readline()
if line is NeedMoreData:
yield NeedMoreData
continue
if line == '':
break
lines.append(line)
self._cur.set_payload(EMPTYSTRING.join(lines))
return
if self._cur.get_content_type() == 'message/delivery-status':
# message/delivery-status contains blocks of headers separated by
# a blank line. We'll represent each header block as a separate
# nested message object, but the processing is a bit different
# than standard message/* types because there is no body for the
# nested messages. A blank line separates the subparts.
while True:
self._input.push_eof_matcher(NLCRE.match)
for retval in self._parsegen():
if retval is NeedMoreData:
yield NeedMoreData
continue
break
msg = self._pop_message()
# We need to pop the EOF matcher in order to tell if we're at
# the end of the current file, not the end of the last block
# of message headers.
self._input.pop_eof_matcher()
# The input stream must be sitting at the newline or at the
# EOF. We want to see if we're at the end of this subpart, so
# first consume the blank line, then test the next line to see
# if we're at this subpart's EOF.
while True:
line = self._input.readline()
if line is NeedMoreData:
yield NeedMoreData
continue
break
while True:
line = self._input.readline()
if line is NeedMoreData:
yield NeedMoreData
continue
break
if line == '':
break
# Not at EOF so this is a line we're going to need.
self._input.unreadline(line)
return
if self._cur.get_content_maintype() == 'message':
# The message claims to be a message/* type, then what follows is
# another RFC 2822 message.
for retval in self._parsegen():
if retval is NeedMoreData:
yield NeedMoreData
continue
break
self._pop_message()
return
if self._cur.get_content_maintype() == 'multipart':
boundary = self._cur.get_boundary()
if boundary is None:
# The message /claims/ to be a multipart but it has not
# defined a boundary. That's a problem which we'll handle by
# reading everything until the EOF and marking the message as
# defective.
defect = errors.NoBoundaryInMultipartDefect()
self.policy.handle_defect(self._cur, defect)
lines = []
for line in self._input:
if line is NeedMoreData:
yield NeedMoreData
continue
lines.append(line)
self._cur.set_payload(EMPTYSTRING.join(lines))
return
# Make sure a valid content type was specified per RFC 2045:6.4.
if (self._cur.get('content-transfer-encoding', '8bit').lower()
not in ('7bit', '8bit', 'binary')):
defect = errors.InvalidMultipartContentTransferEncodingDefect()
self.policy.handle_defect(self._cur, defect)
# Create a line match predicate which matches the inter-part
# boundary as well as the end-of-multipart boundary. Don't push
# this onto the input stream until we've scanned past the
# preamble.
separator = '--' + boundary
boundaryre = re.compile(
'(?P<sep>' + re.escape(separator) +
r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
capturing_preamble = True
preamble = []
linesep = False
close_boundary_seen = False
while True:
line = self._input.readline()
if line is NeedMoreData:
yield NeedMoreData
continue
if line == '':
break
mo = boundaryre.match(line)
if mo:
# If we're looking at the end boundary, we're done with
# this multipart. If there was a newline at the end of
# the closing boundary, then we need to initialize the
# epilogue with the empty string (see below).
if mo.group('end'):
close_boundary_seen = True
linesep = mo.group('linesep')
break
# We saw an inter-part boundary. Were we in the preamble?
if capturing_preamble:
if preamble:
# According to RFC 2046, the last newline belongs
# to the boundary.
lastline = preamble[-1]
eolmo = NLCRE_eol.search(lastline)
if eolmo:
preamble[-1] = lastline[:-len(eolmo.group(0))]
self._cur.preamble = EMPTYSTRING.join(preamble)
capturing_preamble = False
self._input.unreadline(line)
continue
# We saw a boundary separating two parts. Consume any
# multiple boundary lines that may be following. Our
# interpretation of RFC 2046 BNF grammar does not produce
# body parts within such double boundaries.
while True:
line = self._input.readline()
if line is NeedMoreData:
yield NeedMoreData
continue
mo = boundaryre.match(line)
if not mo:
self._input.unreadline(line)
break
# Recurse to parse this subpart; the input stream points
# at the subpart's first line.
self._input.push_eof_matcher(boundaryre.match)
for retval in self._parsegen():
if retval is NeedMoreData:
yield NeedMoreData
continue
break
# Because of RFC 2046, the newline preceding the boundary
# separator actually belongs to the boundary, not the
# previous subpart's payload (or epilogue if the previous
# part is a multipart).
if self._last.get_content_maintype() == 'multipart':
epilogue = self._last.epilogue
if epilogue == '':
self._last.epilogue = None
elif epilogue is not None:
mo = NLCRE_eol.search(epilogue)
if mo:
end = len(mo.group(0))
self._last.epilogue = epilogue[:-end]
else:
payload = self._last._payload
if isinstance(payload, str):
mo = NLCRE_eol.search(payload)
if mo:
payload = payload[:-len(mo.group(0))]
self._last._payload = payload
self._input.pop_eof_matcher()
self._pop_message()
# Set the multipart up for newline cleansing, which will
# happen if we're in a nested multipart.
self._last = self._cur
else:
# I think we must be in the preamble
assert capturing_preamble
preamble.append(line)
# We've seen either the EOF or the end boundary. If we're still
# capturing the preamble, we never saw the start boundary. Note
# that as a defect and store the captured text as the payload.
if capturing_preamble:
defect = errors.StartBoundaryNotFoundDefect()
self.policy.handle_defect(self._cur, defect)
self._cur.set_payload(EMPTYSTRING.join(preamble))
epilogue = []
for line in self._input:
if line is NeedMoreData:
yield NeedMoreData
continue
self._cur.epilogue = EMPTYSTRING.join(epilogue)
return
# If we're not processing the preamble, then we might have seen
# EOF without seeing that end boundary...that is also a defect.
if not close_boundary_seen:
defect = errors.CloseBoundaryNotFoundDefect()
self.policy.handle_defect(self._cur, defect)
return
# Everything from here to the EOF is epilogue. If the end boundary
# ended in a newline, we'll need to make sure the epilogue isn't
# None
if linesep:
epilogue = ['']
else:
epilogue = []
for line in self._input:
if line is NeedMoreData:
yield NeedMoreData
continue
epilogue.append(line)
# Any CRLF at the front of the epilogue is not technically part of
# the epilogue. Also, watch out for an empty string epilogue,
# which means a single newline.
if epilogue:
firstline = epilogue[0]
bolmo = NLCRE_bol.match(firstline)
if bolmo:
epilogue[0] = firstline[len(bolmo.group(0)):]
self._cur.epilogue = EMPTYSTRING.join(epilogue)
return
# Otherwise, it's some non-multipart type, so the entire rest of the
# file contents becomes the payload.
lines = []
for line in self._input:
if line is NeedMoreData:
yield NeedMoreData
continue
lines.append(line)
self._cur.set_payload(EMPTYSTRING.join(lines))
def _parse_headers(self, lines):
# Passed a list of lines that make up the headers for the current msg
lastheader = ''
lastvalue = []
for lineno, line in enumerate(lines):
# Check for continuation
if line[0] in ' \t':
if not lastheader:
# The first line of the headers was a continuation. This
# is illegal, so let's note the defect, store the illegal
# line, and ignore it for purposes of headers.
defect = errors.FirstHeaderLineIsContinuationDefect(line)
self.policy.handle_defect(self._cur, defect)
continue
lastvalue.append(line)
continue
if lastheader:
self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
lastheader, lastvalue = '', []
# Check for envelope header, i.e. unix-from
if line.startswith('From '):
if lineno == 0:
# Strip off the trailing newline
mo = NLCRE_eol.search(line)
if mo:
line = line[:-len(mo.group(0))]
self._cur.set_unixfrom(line)
continue
elif lineno == len(lines) - 1:
# Something looking like a unix-from at the end - it's
# probably the first line of the body, so push back the
# line and stop.
self._input.unreadline(line)
return
else:
# Weirdly placed unix-from line. Note this as a defect
# and ignore it.
defect = errors.MisplacedEnvelopeHeaderDefect(line)
self._cur.defects.append(defect)
continue
# Split the line on the colon separating field name from value.
# There will always be a colon, because if there wasn't the part of
# the parser that calls us would have started parsing the body.
i = line.find(':')
assert i>0, "_parse_headers fed line with no : and no leading WS"
lastheader = line[:i]
lastvalue = [line]
# Done with all the lines, so handle the last header.
if lastheader:
self._cur.set_raw(*self.policy.header_source_parse(lastvalue))
class BytesFeedParser(FeedParser):
"""Like FeedParser, but feed accepts bytes."""
def feed(self, data):
super().feed(data.decode('ascii', 'surrogateescape'))

583
upip/email/header.py Normal file
View File

@ -0,0 +1,583 @@
# Copyright (C) 2002-2007 Python Software Foundation
# Author: Ben Gertzfield, Barry Warsaw
# Contact: email-sig@python.org
"""Header encoding and decoding functionality."""
__all__ = [
'Header',
'decode_header',
'make_header',
]
import re
import binascii
import email.quoprimime
import email.base64mime
from email.errors import HeaderParseError
from email import charset as _charset
Charset = _charset.Charset
NL = '\n'
SPACE = ' '
BSPACE = b' '
SPACE8 = ' ' * 8
EMPTYSTRING = ''
MAXLINELEN = 78
FWS = ' \t'
USASCII = Charset('us-ascii')
UTF8 = Charset('utf-8')
# Match encoded-word strings in the form =?charset?q?Hello_World?=
ecre = re.compile(r'''
=\? # literal =?
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
\? # literal ?
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
\? # literal ?
(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
\?= # literal ?=
''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
# Field name regexp, including trailing colon, but not separating whitespace,
# according to RFC 2822. Character range is from tilde to exclamation mark.
# For use with .match()
fcre = re.compile(r'[\041-\176]+:$')
# Find a header embedded in a putative header value. Used to check for
# header injection attack.
_embeded_header = re.compile(r'\n[^ \t]+:')
# Helpers
_max_append = email.quoprimime._max_append
def decode_header(header):
"""Decode a message header value without converting charset.
Returns a list of (string, charset) pairs containing each of the decoded
parts of the header. Charset is None for non-encoded parts of the header,
otherwise a lower-case string containing the name of the character set
specified in the encoded string.
header may be a string that may or may not contain RFC2047 encoded words,
or it may be a Header object.
An email.errors.HeaderParseError may be raised when certain decoding error
occurs (e.g. a base64 decoding exception).
"""
# If it is a Header object, we can just return the encoded chunks.
if hasattr(header, '_chunks'):
return [(_charset._encode(string, str(charset)), str(charset))
for string, charset in header._chunks]
# If no encoding, just return the header with no charset.
if not ecre.search(header):
return [(header, None)]
# First step is to parse all the encoded parts into triplets of the form
# (encoded_string, encoding, charset). For unencoded strings, the last
# two parts will be None.
words = []
for line in header.splitlines():
parts = ecre.split(line)
first = True
while parts:
unencoded = parts.pop(0)
if first:
unencoded = unencoded.lstrip()
first = False
if unencoded:
words.append((unencoded, None, None))
if parts:
charset = parts.pop(0).lower()
encoding = parts.pop(0).lower()
encoded = parts.pop(0)
words.append((encoded, encoding, charset))
# Now loop over words and remove words that consist of whitespace
# between two encoded strings.
import sys
droplist = []
for n, w in enumerate(words):
if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace():
droplist.append(n-1)
for d in reversed(droplist):
del words[d]
# The next step is to decode each encoded word by applying the reverse
# base64 or quopri transformation. decoded_words is now a list of the
# form (decoded_word, charset).
decoded_words = []
for encoded_string, encoding, charset in words:
if encoding is None:
# This is an unencoded word.
decoded_words.append((encoded_string, charset))
elif encoding == 'q':
word = email.quoprimime.header_decode(encoded_string)
decoded_words.append((word, charset))
elif encoding == 'b':
paderr = len(encoded_string) % 4 # Postel's law: add missing padding
if paderr:
encoded_string += '==='[:4 - paderr]
try:
word = email.base64mime.decode(encoded_string)
except binascii.Error:
raise HeaderParseError('Base64 decoding error')
else:
decoded_words.append((word, charset))
else:
raise AssertionError('Unexpected encoding: ' + encoding)
# Now convert all words to bytes and collapse consecutive runs of
# similarly encoded words.
collapsed = []
last_word = last_charset = None
for word, charset in decoded_words:
if isinstance(word, str):
word = bytes(word, 'raw-unicode-escape')
if last_word is None:
last_word = word
last_charset = charset
elif charset != last_charset:
collapsed.append((last_word, last_charset))
last_word = word
last_charset = charset
elif last_charset is None:
last_word += BSPACE + word
else:
last_word += word
collapsed.append((last_word, last_charset))
return collapsed
def make_header(decoded_seq, maxlinelen=None, header_name=None,
continuation_ws=' '):
"""Create a Header from a sequence of pairs as returned by decode_header()
decode_header() takes a header value string and returns a sequence of
pairs of the format (decoded_string, charset) where charset is the string
name of the character set.
This function takes one of those sequence of pairs and returns a Header
instance. Optional maxlinelen, header_name, and continuation_ws are as in
the Header constructor.
"""
h = Header(maxlinelen=maxlinelen, header_name=header_name,
continuation_ws=continuation_ws)
for s, charset in decoded_seq:
# None means us-ascii but we can simply pass it on to h.append()
if charset is not None and not isinstance(charset, Charset):
charset = Charset(charset)
h.append(s, charset)
return h
class Header:
def __init__(self, s=None, charset=None,
maxlinelen=None, header_name=None,
continuation_ws=' ', errors='strict'):
"""Create a MIME-compliant header that can contain many character sets.
Optional s is the initial header value. If None, the initial header
value is not set. You can later append to the header with .append()
method calls. s may be a byte string or a Unicode string, but see the
.append() documentation for semantics.
Optional charset serves two purposes: it has the same meaning as the
charset argument to the .append() method. It also sets the default
character set for all subsequent .append() calls that omit the charset
argument. If charset is not provided in the constructor, the us-ascii
charset is used both as s's initial charset and as the default for
subsequent .append() calls.
The maximum line length can be specified explicitly via maxlinelen. For
splitting the first line to a shorter value (to account for the field
header which isn't included in s, e.g. `Subject') pass in the name of
the field in header_name. The default maxlinelen is 78 as recommended
by RFC 2822.
continuation_ws must be RFC 2822 compliant folding whitespace (usually
either a space or a hard tab) which will be prepended to continuation
lines.
errors is passed through to the .append() call.
"""
if charset is None:
charset = USASCII
elif not isinstance(charset, Charset):
charset = Charset(charset)
self._charset = charset
self._continuation_ws = continuation_ws
self._chunks = []
if s is not None:
self.append(s, charset, errors)
if maxlinelen is None:
maxlinelen = MAXLINELEN
self._maxlinelen = maxlinelen
if header_name is None:
self._headerlen = 0
else:
# Take the separating colon and space into account.
self._headerlen = len(header_name) + 2
def __str__(self):
"""Return the string value of the header."""
self._normalize()
uchunks = []
lastcs = None
lastspace = None
for string, charset in self._chunks:
# We must preserve spaces between encoded and non-encoded word
# boundaries, which means for us we need to add a space when we go
# from a charset to None/us-ascii, or from None/us-ascii to a
# charset. Only do this for the second and subsequent chunks.
# Don't add a space if the None/us-ascii string already has
# a space (trailing or leading depending on transition)
nextcs = charset
if nextcs == _charset.UNKNOWN8BIT:
original_bytes = string.encode('ascii', 'surrogateescape')
string = original_bytes.decode('ascii', 'replace')
if uchunks:
hasspace = string and self._nonctext(string[0])
if lastcs not in (None, 'us-ascii'):
if nextcs in (None, 'us-ascii') and not hasspace:
uchunks.append(SPACE)
nextcs = None
elif nextcs not in (None, 'us-ascii') and not lastspace:
uchunks.append(SPACE)
lastspace = string and self._nonctext(string[-1])
lastcs = nextcs
uchunks.append(string)
return EMPTYSTRING.join(uchunks)
# Rich comparison operators for equality only. BAW: does it make sense to
# have or explicitly disable <, <=, >, >= operators?
def __eq__(self, other):
# other may be a Header or a string. Both are fine so coerce
# ourselves to a unicode (of the unencoded header value), swap the
# args and do another comparison.
return other == str(self)
def __ne__(self, other):
return not self == other
def append(self, s, charset=None, errors='strict'):
"""Append a string to the MIME header.
Optional charset, if given, should be a Charset instance or the name
of a character set (which will be converted to a Charset instance). A
value of None (the default) means that the charset given in the
constructor is used.
s may be a byte string or a Unicode string. If it is a byte string
(i.e. isinstance(s, str) is false), then charset is the encoding of
that byte string, and a UnicodeError will be raised if the string
cannot be decoded with that charset. If s is a Unicode string, then
charset is a hint specifying the character set of the characters in
the string. In either case, when producing an RFC 2822 compliant
header using RFC 2047 rules, the string will be encoded using the
output codec of the charset. If the string cannot be encoded to the
output codec, a UnicodeError will be raised.
Optional `errors' is passed as the errors argument to the decode
call if s is a byte string.
"""
if charset is None:
charset = self._charset
elif not isinstance(charset, Charset):
charset = Charset(charset)
if not isinstance(s, str):
input_charset = charset.input_codec or 'us-ascii'
if input_charset == _charset.UNKNOWN8BIT:
s = s.decode('us-ascii', 'surrogateescape')
else:
s = s.decode(input_charset, errors)
# Ensure that the bytes we're storing can be decoded to the output
# character set, otherwise an early error is raised.
output_charset = charset.output_codec or 'us-ascii'
if output_charset != _charset.UNKNOWN8BIT:
try:
s.encode(output_charset, errors)
except UnicodeEncodeError:
if output_charset!='us-ascii':
raise
charset = UTF8
self._chunks.append((s, charset))
def _nonctext(self, s):
"""True if string s is not a ctext character of RFC822.
"""
return s in (' ', '\t', '(', ')', '\\')
def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):
r"""Encode a message header into an RFC-compliant format.
There are many issues involved in converting a given string for use in
an email header. Only certain character sets are readable in most
email clients, and as header strings can only contain a subset of
7-bit ASCII, care must be taken to properly convert and encode (with
Base64 or quoted-printable) header strings. In addition, there is a
75-character length limit on any given encoded header field, so
line-wrapping must be performed, even with double-byte character sets.
Optional maxlinelen specifies the maximum length of each generated
line, exclusive of the linesep string. Individual lines may be longer
than maxlinelen if a folding point cannot be found. The first line
will be shorter by the length of the header name plus ": " if a header
name was specified at Header construction time. The default value for
maxlinelen is determined at header construction time.
Optional splitchars is a string containing characters which should be
given extra weight by the splitting algorithm during normal header
wrapping. This is in very rough support of RFC 2822's `higher level
syntactic breaks': split points preceded by a splitchar are preferred
during line splitting, with the characters preferred in the order in
which they appear in the string. Space and tab may be included in the
string to indicate whether preference should be given to one over the
other as a split point when other split chars do not appear in the line
being split. Splitchars does not affect RFC 2047 encoded lines.
Optional linesep is a string to be used to separate the lines of
the value. The default value is the most useful for typical
Python applications, but it can be set to \r\n to produce RFC-compliant
line separators when needed.
"""
self._normalize()
if maxlinelen is None:
maxlinelen = self._maxlinelen
# A maxlinelen of 0 means don't wrap. For all practical purposes,
# choosing a huge number here accomplishes that and makes the
# _ValueFormatter algorithm much simpler.
if maxlinelen == 0:
maxlinelen = 1000000
formatter = _ValueFormatter(self._headerlen, maxlinelen,
self._continuation_ws, splitchars)
lastcs = None
hasspace = lastspace = None
for string, charset in self._chunks:
if hasspace is not None:
hasspace = string and self._nonctext(string[0])
import sys
if lastcs not in (None, 'us-ascii'):
if not hasspace or charset not in (None, 'us-ascii'):
formatter.add_transition()
elif charset not in (None, 'us-ascii') and not lastspace:
formatter.add_transition()
lastspace = string and self._nonctext(string[-1])
lastcs = charset
hasspace = False
lines = string.splitlines()
if lines:
formatter.feed('', lines[0], charset)
else:
formatter.feed('', '', charset)
for line in lines[1:]:
formatter.newline()
if charset.header_encoding is not None:
formatter.feed(self._continuation_ws, ' ' + line.lstrip(),
charset)
else:
sline = line.lstrip()
fws = line[:len(line)-len(sline)]
formatter.feed(fws, sline, charset)
if len(lines) > 1:
formatter.newline()
if self._chunks:
formatter.add_transition()
value = formatter._str(linesep)
if _embeded_header.search(value):
raise HeaderParseError("header value appears to contain "
"an embedded header: {!r}".format(value))
return value
def _normalize(self):
# Step 1: Normalize the chunks so that all runs of identical charsets
# get collapsed into a single unicode string.
chunks = []
last_charset = None
last_chunk = []
for string, charset in self._chunks:
if charset == last_charset:
last_chunk.append(string)
else:
if last_charset is not None:
chunks.append((SPACE.join(last_chunk), last_charset))
last_chunk = [string]
last_charset = charset
if last_chunk:
chunks.append((SPACE.join(last_chunk), last_charset))
self._chunks = chunks
class _ValueFormatter:
def __init__(self, headerlen, maxlen, continuation_ws, splitchars):
self._maxlen = maxlen
self._continuation_ws = continuation_ws
self._continuation_ws_len = len(continuation_ws)
self._splitchars = splitchars
self._lines = []
self._current_line = _Accumulator(headerlen)
def _str(self, linesep):
self.newline()
return linesep.join(self._lines)
def __str__(self):
return self._str(NL)
def newline(self):
end_of_line = self._current_line.pop()
if end_of_line != (' ', ''):
self._current_line.push(*end_of_line)
if len(self._current_line) > 0:
if self._current_line.is_onlyws():
self._lines[-1] += str(self._current_line)
else:
self._lines.append(str(self._current_line))
self._current_line.reset()
def add_transition(self):
self._current_line.push(' ', '')
def feed(self, fws, string, charset):
# If the charset has no header encoding (i.e. it is an ASCII encoding)
# then we must split the header at the "highest level syntactic break"
# possible. Note that we don't have a lot of smarts about field
# syntax; we just try to break on semi-colons, then commas, then
# whitespace. Eventually, this should be pluggable.
if charset.header_encoding is None:
self._ascii_split(fws, string, self._splitchars)
return
# Otherwise, we're doing either a Base64 or a quoted-printable
# encoding which means we don't need to split the line on syntactic
# breaks. We can basically just find enough characters to fit on the
# current line, minus the RFC 2047 chrome. What makes this trickier
# though is that we have to split at octet boundaries, not character
# boundaries but it's only safe to split at character boundaries so at
# best we can only get close.
encoded_lines = charset.header_encode_lines(string, self._maxlengths())
# The first element extends the current line, but if it's None then
# nothing more fit on the current line so start a new line.
try:
first_line = encoded_lines.pop(0)
except IndexError:
# There are no encoded lines, so we're done.
return
if first_line is not None:
self._append_chunk(fws, first_line)
try:
last_line = encoded_lines.pop()
except IndexError:
# There was only one line.
return
self.newline()
self._current_line.push(self._continuation_ws, last_line)
# Everything else are full lines in themselves.
for line in encoded_lines:
self._lines.append(self._continuation_ws + line)
def _maxlengths(self):
# The first line's length.
yield self._maxlen - len(self._current_line)
while True:
yield self._maxlen - self._continuation_ws_len
def _ascii_split(self, fws, string, splitchars):
# The RFC 2822 header folding algorithm is simple in principle but
# complex in practice. Lines may be folded any place where "folding
# white space" appears by inserting a linesep character in front of the
# FWS. The complication is that not all spaces or tabs qualify as FWS,
# and we are also supposed to prefer to break at "higher level
# syntactic breaks". We can't do either of these without intimate
# knowledge of the structure of structured headers, which we don't have
# here. So the best we can do here is prefer to break at the specified
# splitchars, and hope that we don't choose any spaces or tabs that
# aren't legal FWS. (This is at least better than the old algorithm,
# where we would sometimes *introduce* FWS after a splitchar, or the
# algorithm before that, where we would turn all white space runs into
# single spaces or tabs.)
parts = re.split("(["+FWS+"]+)", fws+string)
if parts[0]:
parts[:0] = ['']
else:
parts.pop(0)
for fws, part in zip(*[iter(parts)]*2):
self._append_chunk(fws, part)
def _append_chunk(self, fws, string):
self._current_line.push(fws, string)
if len(self._current_line) > self._maxlen:
# Find the best split point, working backward from the end.
# There might be none, on a long first line.
for ch in self._splitchars:
for i in range(self._current_line.part_count()-1, 0, -1):
if ch.isspace():
fws = self._current_line[i][0]
if fws and fws[0]==ch:
break
prevpart = self._current_line[i-1][1]
if prevpart and prevpart[-1]==ch:
break
else:
continue
break
else:
fws, part = self._current_line.pop()
if self._current_line._initial_size > 0:
# There will be a header, so leave it on a line by itself.
self.newline()
if not fws:
# We don't use continuation_ws here because the whitespace
# after a header should always be a space.
fws = ' '
self._current_line.push(fws, part)
return
remainder = self._current_line.pop_from(i)
self._lines.append(str(self._current_line))
self._current_line.reset(remainder)
class _Accumulator(list):
def __init__(self, initial_size=0):
self._initial_size = initial_size
super().__init__()
def push(self, fws, string):
self.append((fws, string))
def pop_from(self, i=0):
popped = self[i:]
self[i:] = []
return popped
def pop(self):
if self.part_count()==0:
return ('', '')
return super().pop()
def __len__(self):
return sum((len(fws)+len(part) for fws, part in self),
self._initial_size)
def __str__(self):
return EMPTYSTRING.join((EMPTYSTRING.join((fws, part))
for fws, part in self))
def reset(self, startval=None):
if startval is None:
startval = []
self[:] = startval
self._initial_size = 0
def is_onlyws(self):
return self._initial_size==0 and (not self or str(self).isspace())
def part_count(self):
return super().__len__()

73
upip/email/iterators.py Normal file
View File

@ -0,0 +1,73 @@
# Copyright (C) 2001-2006 Python Software Foundation
# Author: Barry Warsaw
# Contact: email-sig@python.org
"""Various types of useful iterators and generators."""
__all__ = [
'body_line_iterator',
'typed_subpart_iterator',
'walk',
# Do not include _structure() since it's part of the debugging API.
]
import sys
from io import StringIO
# This function will become a method of the Message class
def walk(self):
"""Walk over the message tree, yielding each subpart.
The walk is performed in depth-first order. This method is a
generator.
"""
yield self
if self.is_multipart():
for subpart in self.get_payload():
for subsubpart in subpart.walk():
yield subsubpart
# These two functions are imported into the Iterators.py interface module.
def body_line_iterator(msg, decode=False):
"""Iterate over the parts, returning string payloads line-by-line.
Optional decode (default False) is passed through to .get_payload().
"""
for subpart in msg.walk():
payload = subpart.get_payload(decode=decode)
if isinstance(payload, str):
for line in StringIO(payload):
yield line
def typed_subpart_iterator(msg, maintype='text', subtype=None):
"""Iterate over the subparts with a given MIME type.
Use `maintype' as the main MIME type to match against; this defaults to
"text". Optional `subtype' is the MIME subtype to match against; if
omitted, only the main type is matched.
"""
for subpart in msg.walk():
if subpart.get_content_maintype() == maintype:
if subtype is None or subpart.get_content_subtype() == subtype:
yield subpart
def _structure(msg, fp=None, level=0, include_default=False):
"""A handy debugging aid"""
if fp is None:
fp = sys.stdout
tab = ' ' * (level * 4)
print(tab + msg.get_content_type(), end='', file=fp)
if include_default:
print(' [%s]' % msg.get_default_type(), file=fp)
else:
print(file=fp)
if msg.is_multipart():
for subpart in msg.get_payload():
_structure(subpart, fp, level+1, include_default)

879
upip/email/message.py Normal file
View File

@ -0,0 +1,879 @@
# Copyright (C) 2001-2007 Python Software Foundation
# Author: Barry Warsaw
# Contact: email-sig@python.org
"""Basic message object for the email package object model."""
__all__ = ['Message']
import re
import uu
import base64
import binascii
from io import BytesIO, StringIO
# Intrapackage imports
from email import utils
from email import errors
from email._policybase import compat32
from email import charset as _charset
from email._encoded_words import decode_b
Charset = _charset.Charset
SEMISPACE = '; '
# Regular expression that matches `special' characters in parameters, the
# existence of which force quoting of the parameter value.
tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
def _splitparam(param):
# Split header parameters. BAW: this may be too simple. It isn't
# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
# found in the wild. We may eventually need a full fledged parser.
# RDM: we might have a Header here; for now just stringify it.
a, sep, b = str(param).partition(';')
if not sep:
return a.strip(), None
return a.strip(), b.strip()
def _formatparam(param, value=None, quote=True):
"""Convenience function to format and return a key=value pair.
This will quote the value if needed or if quote is true. If value is a
three tuple (charset, language, value), it will be encoded according
to RFC2231 rules. If it contains non-ascii characters it will likewise
be encoded according to RFC2231 rules, using the utf-8 charset and
a null language.
"""
if value is not None and len(value) > 0:
# A tuple is used for RFC 2231 encoded parameter values where items
# are (charset, language, value). charset is a string, not a Charset
# instance. RFC 2231 encoded values are never quoted, per RFC.
if isinstance(value, tuple):
# Encode as per RFC 2231
param += '*'
value = utils.encode_rfc2231(value[2], value[0], value[1])
return '%s=%s' % (param, value)
else:
try:
value.encode('ascii')
except UnicodeEncodeError:
param += '*'
value = utils.encode_rfc2231(value, 'utf-8', '')
return '%s=%s' % (param, value)
# BAW: Please check this. I think that if quote is set it should
# force quoting even if not necessary.
if quote or tspecials.search(value):
return '%s="%s"' % (param, utils.quote(value))
else:
return '%s=%s' % (param, value)
else:
return param
def _parseparam(s):
# RDM This might be a Header, so for now stringify it.
s = ';' + str(s)
plist = []
while s[:1] == ';':
s = s[1:]
end = s.find(';')
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
end = s.find(';', end + 1)
if end < 0:
end = len(s)
f = s[:end]
if '=' in f:
i = f.index('=')
f = f[:i].strip().lower() + '=' + f[i+1:].strip()
plist.append(f.strip())
s = s[end:]
return plist
def _unquotevalue(value):
# This is different than utils.collapse_rfc2231_value() because it doesn't
# try to convert the value to a unicode. Message.get_param() and
# Message.get_params() are both currently defined to return the tuple in
# the face of RFC 2231 parameters.
if isinstance(value, tuple):
return value[0], value[1], utils.unquote(value[2])
else:
return utils.unquote(value)
class Message:
"""Basic message object.
A message object is defined as something that has a bunch of RFC 2822
headers and a payload. It may optionally have an envelope header
(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
multipart or a message/rfc822), then the payload is a list of Message
objects, otherwise it is a string.
Message objects implement part of the `mapping' interface, which assumes
there is exactly one occurrence of the header per message. Some headers
do in fact appear multiple times (e.g. Received) and for those headers,
you must use the explicit API to set or get all the headers. Not all of
the mapping methods are implemented.
"""
def __init__(self, policy=compat32):
self.policy = policy
self._headers = []
self._unixfrom = None
self._payload = None
self._charset = None
# Defaults for multipart messages
self.preamble = self.epilogue = None
self.defects = []
# Default content type
self._default_type = 'text/plain'
def __str__(self):
"""Return the entire formatted message as a string.
This includes the headers, body, and envelope header.
"""
return self.as_string()
def as_string(self, unixfrom=False, maxheaderlen=0):
"""Return the entire formatted message as a string.
Optional `unixfrom' when True, means include the Unix From_ envelope
header.
This is a convenience method and may not generate the message exactly
as you intend. For more flexibility, use the flatten() method of a
Generator instance.
"""
from email.generator import Generator
fp = StringIO()
g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen)
g.flatten(self, unixfrom=unixfrom)
return fp.getvalue()
def is_multipart(self):
"""Return True if the message consists of multiple parts."""
return isinstance(self._payload, list)
#
# Unix From_ line
#
def set_unixfrom(self, unixfrom):
self._unixfrom = unixfrom
def get_unixfrom(self):
return self._unixfrom
#
# Payload manipulation.
#
def attach(self, payload):
"""Add the given payload to the current payload.
The current payload will always be a list of objects after this method
is called. If you want to set the payload to a scalar object, use
set_payload() instead.
"""
if self._payload is None:
self._payload = [payload]
else:
self._payload.append(payload)
def get_payload(self, i=None, decode=False):
"""Return a reference to the payload.
The payload will either be a list object or a string. If you mutate
the list object, you modify the message's payload in place. Optional
i returns that index into the payload.
Optional decode is a flag indicating whether the payload should be
decoded or not, according to the Content-Transfer-Encoding header
(default is False).
When True and the message is not a multipart, the payload will be
decoded if this header's value is `quoted-printable' or `base64'. If
some other encoding is used, or the header is missing, or if the
payload has bogus data (i.e. bogus base64 or uuencoded data), the
payload is returned as-is.
If the message is a multipart and the decode flag is True, then None
is returned.
"""
# Here is the logic table for this code, based on the email5.0.0 code:
# i decode is_multipart result
# ------ ------ ------------ ------------------------------
# None True True None
# i True True None
# None False True _payload (a list)
# i False True _payload element i (a Message)
# i False False error (not a list)
# i True False error (not a list)
# None False False _payload
# None True False _payload decoded (bytes)
# Note that Barry planned to factor out the 'decode' case, but that
# isn't so easy now that we handle the 8 bit data, which needs to be
# converted in both the decode and non-decode path.
if self.is_multipart():
if decode:
return None
if i is None:
return self._payload
else:
return self._payload[i]
# For backward compatibility, Use isinstance and this error message
# instead of the more logical is_multipart test.
if i is not None and not isinstance(self._payload, list):
raise TypeError('Expected list, got %s' % type(self._payload))
payload = self._payload
# cte might be a Header, so for now stringify it.
cte = str(self.get('content-transfer-encoding', '')).lower()
# payload may be bytes here.
if isinstance(payload, str):
if utils._has_surrogates(payload):
bpayload = payload.encode('ascii', 'surrogateescape')
if not decode:
try:
payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
except LookupError:
payload = bpayload.decode('ascii', 'replace')
elif decode:
try:
bpayload = payload.encode('ascii')
except UnicodeError:
# This won't happen for RFC compliant messages (messages
# containing only ASCII codepoints in the unicode input).
# If it does happen, turn the string into bytes in a way
# guaranteed not to fail.
bpayload = payload.encode('raw-unicode-escape')
if not decode:
return payload
if cte == 'quoted-printable':
return utils._qdecode(bpayload)
elif cte == 'base64':
# XXX: this is a bit of a hack; decode_b should probably be factored
# out somewhere, but I haven't figured out where yet.
value, defects = decode_b(b''.join(bpayload.splitlines()))
for defect in defects:
self.policy.handle_defect(self, defect)
return value
elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
in_file = BytesIO(bpayload)
out_file = BytesIO()
try:
uu.decode(in_file, out_file, quiet=True)
return out_file.getvalue()
except uu.Error:
# Some decoding problem
return bpayload
if isinstance(payload, str):
return bpayload
return payload
def set_payload(self, payload, charset=None):
"""Set the payload to the given value.
Optional charset sets the message's default character set. See
set_charset() for details.
"""
if isinstance(payload, bytes):
payload = payload.decode('ascii', 'surrogateescape')
self._payload = payload
if charset is not None:
self.set_charset(charset)
def set_charset(self, charset):
"""Set the charset of the payload to a given character set.
charset can be a Charset instance, a string naming a character set, or
None. If it is a string it will be converted to a Charset instance.
If charset is None, the charset parameter will be removed from the
Content-Type field. Anything else will generate a TypeError.
The message will be assumed to be of type text/* encoded with
charset.input_charset. It will be converted to charset.output_charset
and encoded properly, if needed, when generating the plain text
representation of the message. MIME headers (MIME-Version,
Content-Type, Content-Transfer-Encoding) will be added as needed.
"""
if charset is None:
self.del_param('charset')
self._charset = None
return
if not isinstance(charset, Charset):
charset = Charset(charset)
self._charset = charset
if 'MIME-Version' not in self:
self.add_header('MIME-Version', '1.0')
if 'Content-Type' not in self:
self.add_header('Content-Type', 'text/plain',
charset=charset.get_output_charset())
else:
self.set_param('charset', charset.get_output_charset())
if charset != charset.get_output_charset():
self._payload = charset.body_encode(self._payload)
if 'Content-Transfer-Encoding' not in self:
cte = charset.get_body_encoding()
try:
cte(self)
except TypeError:
self._payload = charset.body_encode(self._payload)
self.add_header('Content-Transfer-Encoding', cte)
def get_charset(self):
"""Return the Charset instance associated with the message's payload.
"""
return self._charset
#
# MAPPING INTERFACE (partial)
#
def __len__(self):
"""Return the total number of headers, including duplicates."""
return len(self._headers)
def __getitem__(self, name):
"""Get a header value.
Return None if the header is missing instead of raising an exception.
Note that if the header appeared multiple times, exactly which
occurrence gets returned is undefined. Use get_all() to get all
the values matching a header field name.
"""
return self.get(name)
def __setitem__(self, name, val):
"""Set the value of a header.
Note: this does not overwrite an existing header with the same field
name. Use __delitem__() first to delete any existing headers.
"""
max_count = self.policy.header_max_count(name)
if max_count:
lname = name.lower()
found = 0
for k, v in self._headers:
if k.lower() == lname:
found += 1
if found >= max_count:
raise ValueError("There may be at most {} {} headers "
"in a message".format(max_count, name))
self._headers.append(self.policy.header_store_parse(name, val))
def __delitem__(self, name):
"""Delete all occurrences of a header, if present.
Does not raise an exception if the header is missing.
"""
name = name.lower()
newheaders = []
for k, v in self._headers:
if k.lower() != name:
newheaders.append((k, v))
self._headers = newheaders
def __contains__(self, name):
return name.lower() in [k.lower() for k, v in self._headers]
def __iter__(self):
for field, value in self._headers:
yield field
def keys(self):
"""Return a list of all the message's header field names.
These will be sorted in the order they appeared in the original
message, or were added to the message, and may contain duplicates.
Any fields deleted and re-inserted are always appended to the header
list.
"""
return [k for k, v in self._headers]
def values(self):
"""Return a list of all the message's header values.
These will be sorted in the order they appeared in the original
message, or were added to the message, and may contain duplicates.
Any fields deleted and re-inserted are always appended to the header
list.
"""
return [self.policy.header_fetch_parse(k, v)
for k, v in self._headers]
def items(self):
"""Get all the message's header fields and values.
These will be sorted in the order they appeared in the original
message, or were added to the message, and may contain duplicates.
Any fields deleted and re-inserted are always appended to the header
list.
"""
return [(k, self.policy.header_fetch_parse(k, v))
for k, v in self._headers]
def get(self, name, failobj=None):
"""Get a header value.
Like __getitem__() but return failobj instead of None when the field
is missing.
"""
name = name.lower()
for k, v in self._headers:
if k.lower() == name:
return self.policy.header_fetch_parse(k, v)
return failobj
#
# "Internal" methods (public API, but only intended for use by a parser
# or generator, not normal application code.
#
def set_raw(self, name, value):
"""Store name and value in the model without modification.
This is an "internal" API, intended only for use by a parser.
"""
self._headers.append((name, value))
def raw_items(self):
"""Return the (name, value) header pairs without modification.
This is an "internal" API, intended only for use by a generator.
"""
return iter(self._headers.copy())
#
# Additional useful stuff
#
def get_all(self, name, failobj=None):
"""Return a list of all the values for the named field.
These will be sorted in the order they appeared in the original
message, and may contain duplicates. Any fields deleted and
re-inserted are always appended to the header list.
If no such fields exist, failobj is returned (defaults to None).
"""
values = []
name = name.lower()
for k, v in self._headers:
if k.lower() == name:
values.append(self.policy.header_fetch_parse(k, v))
if not values:
return failobj
return values
def add_header(self, _name, _value, **_params):
"""Extended header setting.
name is the header field to add. keyword arguments can be used to set
additional parameters for the header field, with underscores converted
to dashes. Normally the parameter will be added as key="value" unless
value is None, in which case only the key will be added. If a
parameter value contains non-ASCII characters it can be specified as a
three-tuple of (charset, language, value), in which case it will be
encoded according to RFC2231 rules. Otherwise it will be encoded using
the utf-8 charset and a language of ''.
Examples:
msg.add_header('content-disposition', 'attachment', filename='bud.gif')
msg.add_header('content-disposition', 'attachment',
filename=('utf-8', '', Fußballer.ppt'))
msg.add_header('content-disposition', 'attachment',
filename='Fußballer.ppt'))
"""
parts = []
for k, v in _params.items():
if v is None:
parts.append(k.replace('_', '-'))
else:
parts.append(_formatparam(k.replace('_', '-'), v))
if _value is not None:
parts.insert(0, _value)
self[_name] = SEMISPACE.join(parts)
def replace_header(self, _name, _value):
"""Replace a header.
Replace the first matching header found in the message, retaining
header order and case. If no matching header was found, a KeyError is
raised.
"""
_name = _name.lower()
for i, (k, v) in zip(range(len(self._headers)), self._headers):
if k.lower() == _name:
self._headers[i] = self.policy.header_store_parse(k, _value)
break
else:
raise KeyError(_name)
#
# Use these three methods instead of the three above.
#
def get_content_type(self):
"""Return the message's content type.
The returned string is coerced to lower case of the form
`maintype/subtype'. If there was no Content-Type header in the
message, the default type as given by get_default_type() will be
returned. Since according to RFC 2045, messages always have a default
type this will always return a value.
RFC 2045 defines a message's default type to be text/plain unless it
appears inside a multipart/digest container, in which case it would be
message/rfc822.
"""
missing = object()
value = self.get('content-type', missing)
if value is missing:
# This should have no parameters
return self.get_default_type()
ctype = _splitparam(value)[0].lower()
# RFC 2045, section 5.2 says if its invalid, use text/plain
if ctype.count('/') != 1:
return 'text/plain'
return ctype
def get_content_maintype(self):
"""Return the message's main content type.
This is the `maintype' part of the string returned by
get_content_type().
"""
ctype = self.get_content_type()
return ctype.split('/')[0]
def get_content_subtype(self):
"""Returns the message's sub-content type.
This is the `subtype' part of the string returned by
get_content_type().
"""
ctype = self.get_content_type()
return ctype.split('/')[1]
def get_default_type(self):
"""Return the `default' content type.
Most messages have a default content type of text/plain, except for
messages that are subparts of multipart/digest containers. Such
subparts have a default content type of message/rfc822.
"""
return self._default_type
def set_default_type(self, ctype):
"""Set the `default' content type.
ctype should be either "text/plain" or "message/rfc822", although this
is not enforced. The default content type is not stored in the
Content-Type header.
"""
self._default_type = ctype
def _get_params_preserve(self, failobj, header):
# Like get_params() but preserves the quoting of values. BAW:
# should this be part of the public interface?
missing = object()
value = self.get(header, missing)
if value is missing:
return failobj
params = []
for p in _parseparam(value):
try:
name, val = p.split('=', 1)
name = name.strip()
val = val.strip()
except ValueError:
# Must have been a bare attribute
name = p.strip()
val = ''
params.append((name, val))
params = utils.decode_params(params)
return params
def get_params(self, failobj=None, header='content-type', unquote=True):
"""Return the message's Content-Type parameters, as a list.
The elements of the returned list are 2-tuples of key/value pairs, as
split on the `=' sign. The left hand side of the `=' is the key,
while the right hand side is the value. If there is no `=' sign in
the parameter the value is the empty string. The value is as
described in the get_param() method.
Optional failobj is the object to return if there is no Content-Type
header. Optional header is the header to search instead of
Content-Type. If unquote is True, the value is unquoted.
"""
missing = object()
params = self._get_params_preserve(missing, header)
if params is missing:
return failobj
if unquote:
return [(k, _unquotevalue(v)) for k, v in params]
else:
return params
def get_param(self, param, failobj=None, header='content-type',
unquote=True):
"""Return the parameter value if found in the Content-Type header.
Optional failobj is the object to return if there is no Content-Type
header, or the Content-Type header has no such parameter. Optional
header is the header to search instead of Content-Type.
Parameter keys are always compared case insensitively. The return
value can either be a string, or a 3-tuple if the parameter was RFC
2231 encoded. When it's a 3-tuple, the elements of the value are of
the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
LANGUAGE can be None, in which case you should consider VALUE to be
encoded in the us-ascii charset. You can usually ignore LANGUAGE.
The parameter value (either the returned string, or the VALUE item in
the 3-tuple) is always unquoted, unless unquote is set to False.
If your application doesn't care whether the parameter was RFC 2231
encoded, it can turn the return value into a string as follows:
param = msg.get_param('foo')
param = email.utils.collapse_rfc2231_value(rawparam)
"""
if header not in self:
return failobj
for k, v in self._get_params_preserve(failobj, header):
if k.lower() == param.lower():
if unquote:
return _unquotevalue(v)
else:
return v
return failobj
def set_param(self, param, value, header='Content-Type', requote=True,
charset=None, language=''):
"""Set a parameter in the Content-Type header.
If the parameter already exists in the header, its value will be
replaced with the new value.
If header is Content-Type and has not yet been defined for this
message, it will be set to "text/plain" and the new parameter and
value will be appended as per RFC 2045.
An alternate header can specified in the header argument, and all
parameters will be quoted as necessary unless requote is False.
If charset is specified, the parameter will be encoded according to RFC
2231. Optional language specifies the RFC 2231 language, defaulting
to the empty string. Both charset and language should be strings.
"""
if not isinstance(value, tuple) and charset:
value = (charset, language, value)
if header not in self and header.lower() == 'content-type':
ctype = 'text/plain'
else:
ctype = self.get(header)
if not self.get_param(param, header=header):
if not ctype:
ctype = _formatparam(param, value, requote)
else:
ctype = SEMISPACE.join(
[ctype, _formatparam(param, value, requote)])
else:
ctype = ''
for old_param, old_value in self.get_params(header=header,
unquote=requote):
append_param = ''
if old_param.lower() == param.lower():
append_param = _formatparam(param, value, requote)
else:
append_param = _formatparam(old_param, old_value, requote)
if not ctype:
ctype = append_param
else:
ctype = SEMISPACE.join([ctype, append_param])
if ctype != self.get(header):
del self[header]
self[header] = ctype
def del_param(self, param, header='content-type', requote=True):
"""Remove the given parameter completely from the Content-Type header.
The header will be re-written in place without the parameter or its
value. All values will be quoted as necessary unless requote is
False. Optional header specifies an alternative to the Content-Type
header.
"""
if header not in self:
return
new_ctype = ''
for p, v in self.get_params(header=header, unquote=requote):
if p.lower() != param.lower():
if not new_ctype:
new_ctype = _formatparam(p, v, requote)
else:
new_ctype = SEMISPACE.join([new_ctype,
_formatparam(p, v, requote)])
if new_ctype != self.get(header):
del self[header]
self[header] = new_ctype
def set_type(self, type, header='Content-Type', requote=True):
"""Set the main type and subtype for the Content-Type header.
type must be a string in the form "maintype/subtype", otherwise a
ValueError is raised.
This method replaces the Content-Type header, keeping all the
parameters in place. If requote is False, this leaves the existing
header's quoting as is. Otherwise, the parameters will be quoted (the
default).
An alternative header can be specified in the header argument. When
the Content-Type header is set, we'll always also add a MIME-Version
header.
"""
# BAW: should we be strict?
if not type.count('/') == 1:
raise ValueError
# Set the Content-Type, you get a MIME-Version
if header.lower() == 'content-type':
del self['mime-version']
self['MIME-Version'] = '1.0'
if header not in self:
self[header] = type
return
params = self.get_params(header=header, unquote=requote)
del self[header]
self[header] = type
# Skip the first param; it's the old type.
for p, v in params[1:]:
self.set_param(p, v, header, requote)
def get_filename(self, failobj=None):
"""Return the filename associated with the payload if present.
The filename is extracted from the Content-Disposition header's
`filename' parameter, and it is unquoted. If that header is missing
the `filename' parameter, this method falls back to looking for the
`name' parameter.
"""
missing = object()
filename = self.get_param('filename', missing, 'content-disposition')
if filename is missing:
filename = self.get_param('name', missing, 'content-type')
if filename is missing:
return failobj
return utils.collapse_rfc2231_value(filename).strip()
def get_boundary(self, failobj=None):
"""Return the boundary associated with the payload if present.
The boundary is extracted from the Content-Type header's `boundary'
parameter, and it is unquoted.
"""
missing = object()
boundary = self.get_param('boundary', missing)
if boundary is missing:
return failobj
# RFC 2046 says that boundaries may begin but not end in w/s
return utils.collapse_rfc2231_value(boundary).rstrip()
def set_boundary(self, boundary):
"""Set the boundary parameter in Content-Type to 'boundary'.
This is subtly different than deleting the Content-Type header and
adding a new one with a new boundary parameter via add_header(). The
main difference is that using the set_boundary() method preserves the
order of the Content-Type header in the original message.
HeaderParseError is raised if the message has no Content-Type header.
"""
missing = object()
params = self._get_params_preserve(missing, 'content-type')
if params is missing:
# There was no Content-Type header, and we don't know what type
# to set it to, so raise an exception.
raise errors.HeaderParseError('No Content-Type header found')
newparams = []
foundp = False
for pk, pv in params:
if pk.lower() == 'boundary':
newparams.append(('boundary', '"%s"' % boundary))
foundp = True
else:
newparams.append((pk, pv))
if not foundp:
# The original Content-Type header had no boundary attribute.
# Tack one on the end. BAW: should we raise an exception
# instead???
newparams.append(('boundary', '"%s"' % boundary))
# Replace the existing Content-Type header with the new value
newheaders = []
for h, v in self._headers:
if h.lower() == 'content-type':
parts = []
for k, v in newparams:
if v == '':
parts.append(k)
else:
parts.append('%s=%s' % (k, v))
val = SEMISPACE.join(parts)
newheaders.append(self.policy.header_store_parse(h, val))
else:
newheaders.append((h, v))
self._headers = newheaders
def get_content_charset(self, failobj=None):
"""Return the charset parameter of the Content-Type header.
The returned string is always coerced to lower case. If there is no
Content-Type header, or if that header has no charset parameter,
failobj is returned.
"""
missing = object()
charset = self.get_param('charset', missing)
if charset is missing:
return failobj
if isinstance(charset, tuple):
# RFC 2231 encoded, so decode it, and it better end up as ascii.
pcharset = charset[0] or 'us-ascii'
try:
# LookupError will be raised if the charset isn't known to
# Python. UnicodeError will be raised if the encoded text
# contains a character not in the charset.
as_bytes = charset[2].encode('raw-unicode-escape')
charset = str(as_bytes, pcharset)
except (LookupError, UnicodeError):
charset = charset[2]
# charset characters must be in us-ascii range
try:
charset.encode('us-ascii')
except UnicodeError:
return failobj
# RFC 2046, $4.1.2 says charsets are not case sensitive
return charset.lower()
def get_charsets(self, failobj=None):
"""Return a list containing the charset(s) used in this message.
The returned list of items describes the Content-Type headers'
charset parameter for this message and all the subparts in its
payload.
Each item will either be a string (the value of the charset parameter
in the Content-Type header of that part) or the value of the
'failobj' parameter (defaults to None), if the part does not have a
main MIME type of "text", or the charset is not defined.
The list will contain one string for each part of the message, plus
one for the container message (i.e. self), so that a non-multipart
message will still return a list of length 1.
"""
return [part.get_content_charset(failobj) for part in self.walk()]
# I.e. def walk(self): ...
from email.iterators import walk

131
upip/email/parser.py Normal file
View File

@ -0,0 +1,131 @@
# Copyright (C) 2001-2007 Python Software Foundation
# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
# Contact: email-sig@python.org
"""A parser of RFC 2822 and MIME email messages."""
__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser']
import warnings
from io import StringIO, TextIOWrapper
from email.feedparser import FeedParser, BytesFeedParser
from email.message import Message
from email._policybase import compat32
class Parser:
def __init__(self, _class=Message, policy=compat32):
"""Parser of RFC 2822 and MIME email messages.
Creates an in-memory object tree representing the email message, which
can then be manipulated and turned over to a Generator to return the
textual representation of the message.
The string must be formatted as a block of RFC 2822 headers and header
continuation lines, optionally preceeded by a `Unix-from' header. The
header block is terminated either by the end of the string or by a
blank line.
_class is the class to instantiate for new message objects when they
must be created. This class must have a constructor that can take
zero arguments. Default is Message.Message.
The policy keyword specifies a policy object that controls a number of
aspects of the parser's operation. The default policy maintains
backward compatibility.
"""
self._class = _class
self.policy = policy
def parse(self, fp, headersonly=False):
"""Create a message structure from the data in a file.
Reads all the data from the file and returns the root of the message
structure. Optional headersonly is a flag specifying whether to stop
parsing after reading the headers or not. The default is False,
meaning it parses the entire contents of the file.
"""
feedparser = FeedParser(self._class, policy=self.policy)
if headersonly:
feedparser._set_headersonly()
while True:
data = fp.read(8192)
if not data:
break
feedparser.feed(data)
return feedparser.close()
def parsestr(self, text, headersonly=False):
"""Create a message structure from a string.
Returns the root of the message structure. Optional headersonly is a
flag specifying whether to stop parsing after reading the headers or
not. The default is False, meaning it parses the entire contents of
the file.
"""
return self.parse(StringIO(text), headersonly=headersonly)
class HeaderParser(Parser):
def parse(self, fp, headersonly=True):
return Parser.parse(self, fp, True)
def parsestr(self, text, headersonly=True):
return Parser.parsestr(self, text, True)
class BytesParser:
def __init__(self, *args, **kw):
"""Parser of binary RFC 2822 and MIME email messages.
Creates an in-memory object tree representing the email message, which
can then be manipulated and turned over to a Generator to return the
textual representation of the message.
The input must be formatted as a block of RFC 2822 headers and header
continuation lines, optionally preceeded by a `Unix-from' header. The
header block is terminated either by the end of the input or by a
blank line.
_class is the class to instantiate for new message objects when they
must be created. This class must have a constructor that can take
zero arguments. Default is Message.Message.
"""
self.parser = Parser(*args, **kw)
def parse(self, fp, headersonly=False):
"""Create a message structure from the data in a binary file.
Reads all the data from the file and returns the root of the message
structure. Optional headersonly is a flag specifying whether to stop
parsing after reading the headers or not. The default is False,
meaning it parses the entire contents of the file.
"""
fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
with fp:
return self.parser.parse(fp, headersonly)
def parsebytes(self, text, headersonly=False):
"""Create a message structure from a byte string.
Returns the root of the message structure. Optional headersonly is a
flag specifying whether to stop parsing after reading the headers or
not. The default is False, meaning it parses the entire contents of
the file.
"""
text = text.decode('ASCII', errors='surrogateescape')
return self.parser.parsestr(text, headersonly)
class BytesHeaderParser(BytesParser):
def parse(self, fp, headersonly=True):
return BytesParser.parse(self, fp, headersonly=True)
def parsebytes(self, text, headersonly=True):
return BytesParser.parsebytes(self, text, headersonly=True)

322
upip/email/quoprimime.py Normal file
View File

@ -0,0 +1,322 @@
# Copyright (C) 2001-2006 Python Software Foundation
# Author: Ben Gertzfield
# Contact: email-sig@python.org
"""Quoted-printable content transfer encoding per RFCs 2045-2047.
This module handles the content transfer encoding method defined in RFC 2045
to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to
safely encode text that is in a character set similar to the 7-bit US ASCII
character set, but that includes some 8-bit characters that are normally not
allowed in email bodies or headers.
Quoted-printable is very space-inefficient for encoding binary files; use the
email.base64mime module for that instead.
This module provides an interface to encode and decode both headers and bodies
with quoted-printable encoding.
RFC 2045 defines a method for including character set information in an
`encoded-word' in a header. This method is commonly used for 8-bit real names
in To:/From:/Cc: etc. fields, as well as Subject: lines.
This module does not do the line wrapping or end-of-line character
conversion necessary for proper internationalized headers; it only
does dumb encoding and decoding. To deal with the various line
wrapping issues, use the email.header module.
"""
__all__ = [
'body_decode',
'body_encode',
'body_length',
'decode',
'decodestring',
'header_decode',
'header_encode',
'header_length',
'quote',
'unquote',
]
import re
import io
from string import ascii_letters, digits, hexdigits
CRLF = '\r\n'
NL = '\n'
EMPTYSTRING = ''
# Build a mapping of octets to the expansion of that octet. Since we're only
# going to have 256 of these things, this isn't terribly inefficient
# space-wise. Remember that headers and bodies have different sets of safe
# characters. Initialize both maps with the full expansion, and then override
# the safe bytes with the more compact form.
_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256))
_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy()
# Safe header bytes which need no encoding.
for c in b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii'):
_QUOPRI_HEADER_MAP[c] = chr(c)
# Headers have one other special encoding; spaces become underscores.
_QUOPRI_HEADER_MAP[ord(' ')] = '_'
# Safe body bytes which need no encoding.
for c in (b' !"#$%&\'()*+,-./0123456789:;<>'
b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`'
b'abcdefghijklmnopqrstuvwxyz{|}~\t'):
_QUOPRI_BODY_MAP[c] = chr(c)
# Helpers
def header_check(octet):
"""Return True if the octet should be escaped with header quopri."""
return chr(octet) != _QUOPRI_HEADER_MAP[octet]
def body_check(octet):
"""Return True if the octet should be escaped with body quopri."""
return chr(octet) != _QUOPRI_BODY_MAP[octet]
def header_length(bytearray):
"""Return a header quoted-printable encoding length.
Note that this does not include any RFC 2047 chrome added by
`header_encode()`.
:param bytearray: An array of bytes (a.k.a. octets).
:return: The length in bytes of the byte array when it is encoded with
quoted-printable for headers.
"""
return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray)
def body_length(bytearray):
"""Return a body quoted-printable encoding length.
:param bytearray: An array of bytes (a.k.a. octets).
:return: The length in bytes of the byte array when it is encoded with
quoted-printable for bodies.
"""
return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray)
def _max_append(L, s, maxlen, extra=''):
if not isinstance(s, str):
s = chr(s)
if not L:
L.append(s.lstrip())
elif len(L[-1]) + len(s) <= maxlen:
L[-1] += extra + s
else:
L.append(s.lstrip())
def unquote(s):
"""Turn a string in the form =AB to the ASCII character with value 0xab"""
return chr(int(s[1:3], 16))
def quote(c):
return '=%02X' % ord(c)
def header_encode(header_bytes, charset='iso-8859-1'):
"""Encode a single header line with quoted-printable (like) encoding.
Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
used specifically for email header fields to allow charsets with mostly 7
bit characters (and some 8 bit) to remain more or less readable in non-RFC
2045 aware mail clients.
charset names the character set to use in the RFC 2046 header. It
defaults to iso-8859-1.
"""
# Return empty headers as an empty string.
if not header_bytes:
return ''
# Iterate over every byte, encoding if necessary.
encoded = []
for octet in header_bytes:
encoded.append(_QUOPRI_HEADER_MAP[octet])
# Now add the RFC chrome to each encoded chunk and glue the chunks
# together.
return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded))
class _body_accumulator(io.StringIO):
def __init__(self, maxlinelen, eol, *args, **kw):
super().__init__(*args, **kw)
self.eol = eol
self.maxlinelen = self.room = maxlinelen
def write_str(self, s):
"""Add string s to the accumulated body."""
self.write(s)
self.room -= len(s)
def newline(self):
"""Write eol, then start new line."""
self.write_str(self.eol)
self.room = self.maxlinelen
def write_soft_break(self):
"""Write a soft break, then start a new line."""
self.write_str('=')
self.newline()
def write_wrapped(self, s, extra_room=0):
"""Add a soft line break if needed, then write s."""
if self.room < len(s) + extra_room:
self.write_soft_break()
self.write_str(s)
def write_char(self, c, is_last_char):
if not is_last_char:
# Another character follows on this line, so we must leave
# extra room, either for it or a soft break, and whitespace
# need not be quoted.
self.write_wrapped(c, extra_room=1)
elif c not in ' \t':
# For this and remaining cases, no more characters follow,
# so there is no need to reserve extra room (since a hard
# break will immediately follow).
self.write_wrapped(c)
elif self.room >= 3:
# It's a whitespace character at end-of-line, and we have room
# for the three-character quoted encoding.
self.write(quote(c))
elif self.room == 2:
# There's room for the whitespace character and a soft break.
self.write(c)
self.write_soft_break()
else:
# There's room only for a soft break. The quoted whitespace
# will be the only content on the subsequent line.
self.write_soft_break()
self.write(quote(c))
def body_encode(body, maxlinelen=76, eol=NL):
"""Encode with quoted-printable, wrapping at maxlinelen characters.
Each line of encoded text will end with eol, which defaults to "\\n". Set
this to "\\r\\n" if you will be using the result of this function directly
in an email.
Each line will be wrapped at, at most, maxlinelen characters before the
eol string (maxlinelen defaults to 76 characters, the maximum value
permitted by RFC 2045). Long lines will have the 'soft line break'
quoted-printable character "=" appended to them, so the decoded text will
be identical to the original text.
The minimum maxlinelen is 4 to have room for a quoted character ("=XX")
followed by a soft line break. Smaller values will generate a
ValueError.
"""
if maxlinelen < 4:
raise ValueError("maxlinelen must be at least 4")
if not body:
return body
# The last line may or may not end in eol, but all other lines do.
last_has_eol = (body[-1] in '\r\n')
# This accumulator will make it easier to build the encoded body.
encoded_body = _body_accumulator(maxlinelen, eol)
lines = body.splitlines()
last_line_no = len(lines) - 1
for line_no, line in enumerate(lines):
last_char_index = len(line) - 1
for i, c in enumerate(line):
if body_check(ord(c)):
c = quote(c)
encoded_body.write_char(c, i==last_char_index)
# Add an eol if input line had eol. All input lines have eol except
# possibly the last one.
if line_no < last_line_no or last_has_eol:
encoded_body.newline()
return encoded_body.getvalue()
# BAW: I'm not sure if the intent was for the signature of this function to be
# the same as base64MIME.decode() or not...
def decode(encoded, eol=NL):
"""Decode a quoted-printable string.
Lines are separated with eol, which defaults to \\n.
"""
if not encoded:
return encoded
# BAW: see comment in encode() above. Again, we're building up the
# decoded string with string concatenation, which could be done much more
# efficiently.
decoded = ''
for line in encoded.splitlines():
line = line.rstrip()
if not line:
decoded += eol
continue
i = 0
n = len(line)
while i < n:
c = line[i]
if c != '=':
decoded += c
i += 1
# Otherwise, c == "=". Are we at the end of the line? If so, add
# a soft line break.
elif i+1 == n:
i += 1
continue
# Decode if in form =AB
elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:
decoded += unquote(line[i:i+3])
i += 3
# Otherwise, not in form =AB, pass literally
else:
decoded += c
i += 1
if i == n:
decoded += eol
# Special case if original string did not end with eol
if encoded[-1] not in '\r\n' and decoded.endswith(eol):
decoded = decoded[:-1]
return decoded
# For convenience and backwards compatibility w/ standard base64 module
body_decode = decode
decodestring = decode
def _unquote_match(match):
"""Turn a match in the form =AB to the ASCII character with value 0xab"""
s = match.group(0)
return unquote(s)
# Header decoding is done a bit differently
def header_decode(s):
"""Decode a string encoded with RFC 2045 MIME header `Q' encoding.
This function does not parse a full MIME header value encoded with
quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use
the high level email.header class for that functionality.
"""
s = s.replace('_', ' ')
return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, flags=re.ASCII)

391
upip/email/utils.py Normal file
View File

@ -0,0 +1,391 @@
# Copyright (C) 2001-2010 Python Software Foundation
# Author: Barry Warsaw
# Contact: email-sig@python.org
"""Miscellaneous utilities."""
__all__ = [
'collapse_rfc2231_value',
'decode_params',
'decode_rfc2231',
'encode_rfc2231',
'formataddr',
'formatdate',
'format_datetime',
'getaddresses',
'make_msgid',
'mktime_tz',
'parseaddr',
'parsedate',
'parsedate_tz',
'parsedate_to_datetime',
'unquote',
]
import os
import re
import time
import base64
import random
import socket
import datetime
import urllib.parse
import warnings
from io import StringIO
from email._parseaddr import quote
from email._parseaddr import AddressList as _AddressList
from email._parseaddr import mktime_tz
from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz
from quopri import decodestring as _qdecode
# Intrapackage imports
from email.encoders import _bencode, _qencode
from email.charset import Charset
COMMASPACE = ', '
EMPTYSTRING = ''
UEMPTYSTRING = ''
CRLF = '\r\n'
TICK = "'"
specialsre = re.compile(r'[][\\()<>@,:;".]')
escapesre = re.compile(r'[\\"]')
# How to figure out if we are processing strings that come from a byte
# source with undecodable characters.
_has_surrogates = re.compile(
'([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
# How to deal with a string containing bytes before handing it to the
# application through the 'normal' interface.
def _sanitize(string):
# Turn any escaped bytes into unicode 'unknown' char.
original_bytes = string.encode('ascii', 'surrogateescape')
return original_bytes.decode('ascii', 'replace')
# Helpers
def formataddr(pair, charset='utf-8'):
"""The inverse of parseaddr(), this takes a 2-tuple of the form
(realname, email_address) and returns the string value suitable
for an RFC 2822 From, To or Cc header.
If the first element of pair is false, then the second element is
returned unmodified.
Optional charset if given is the character set that is used to encode
realname in case realname is not ASCII safe. Can be an instance of str or
a Charset-like object which has a header_encode method. Default is
'utf-8'.
"""
name, address = pair
# The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't.
address.encode('ascii')
if name:
try:
name.encode('ascii')
except UnicodeEncodeError:
if isinstance(charset, str):
charset = Charset(charset)
encoded_name = charset.header_encode(name)
return "%s <%s>" % (encoded_name, address)
else:
quotes = ''
if specialsre.search(name):
quotes = '"'
name = escapesre.sub(r'\\\g<0>', name)
return '%s%s%s <%s>' % (quotes, name, quotes, address)
return address
def getaddresses(fieldvalues):
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
all = COMMASPACE.join(fieldvalues)
a = _AddressList(all)
return a.addresslist
ecre = re.compile(r'''
=\? # literal =?
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
\? # literal ?
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
\? # literal ?
(?P<atom>.*?) # non-greedy up to the next ?= is the atom
\?= # literal ?=
''', re.VERBOSE | re.IGNORECASE)
def _format_timetuple_and_zone(timetuple, zone):
return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]],
timetuple[2],
['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1],
timetuple[0], timetuple[3], timetuple[4], timetuple[5],
zone)
def formatdate(timeval=None, localtime=False, usegmt=False):
"""Returns a date string as specified by RFC 2822, e.g.:
Fri, 09 Nov 2001 01:08:47 -0000
Optional timeval if given is a floating point time value as accepted by
gmtime() and localtime(), otherwise the current time is used.
Optional localtime is a flag that when True, interprets timeval, and
returns a date relative to the local timezone instead of UTC, properly
taking daylight savings time into account.
Optional argument usegmt means that the timezone is written out as
an ascii string, not numeric one (so "GMT" instead of "+0000"). This
is needed for HTTP, and is only used when localtime==False.
"""
# Note: we cannot use strftime() because that honors the locale and RFC
# 2822 requires that day and month names be the English abbreviations.
if timeval is None:
timeval = time.time()
if localtime:
now = time.localtime(timeval)
# Calculate timezone offset, based on whether the local zone has
# daylight savings time, and whether DST is in effect.
if time.daylight and now[-1]:
offset = time.altzone
else:
offset = time.timezone
hours, minutes = divmod(abs(offset), 3600)
# Remember offset is in seconds west of UTC, but the timezone is in
# minutes east of UTC, so the signs differ.
if offset > 0:
sign = '-'
else:
sign = '+'
zone = '%s%02d%02d' % (sign, hours, minutes // 60)
else:
now = time.gmtime(timeval)
# Timezone offset is always -0000
if usegmt:
zone = 'GMT'
else:
zone = '-0000'
return _format_timetuple_and_zone(now, zone)
def format_datetime(dt, usegmt=False):
"""Turn a datetime into a date string as specified in RFC 2822.
If usegmt is True, dt must be an aware datetime with an offset of zero. In
this case 'GMT' will be rendered instead of the normal +0000 required by
RFC2822. This is to support HTTP headers involving date stamps.
"""
now = dt.timetuple()
if usegmt:
if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc:
raise ValueError("usegmt option requires a UTC datetime")
zone = 'GMT'
elif dt.tzinfo is None:
zone = '-0000'
else:
zone = dt.strftime("%z")
return _format_timetuple_and_zone(now, zone)
def make_msgid(idstring=None, domain=None):
"""Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
<20020201195627.33539.96671@nightshade.la.mastaler.com>
Optional idstring if given is a string used to strengthen the
uniqueness of the message id. Optional domain if given provides the
portion of the message id after the '@'. It defaults to the locally
defined hostname.
"""
timeval = time.time()
utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
pid = os.getpid()
randint = random.randrange(100000)
if idstring is None:
idstring = ''
else:
idstring = '.' + idstring
if domain is None:
domain = socket.getfqdn()
msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain)
return msgid
def parsedate_to_datetime(data):
*dtuple, tz = _parsedate_tz(data)
if tz is None:
return datetime.datetime(*dtuple[:6])
return datetime.datetime(*dtuple[:6],
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
def parseaddr(addr):
addrs = _AddressList(addr).addresslist
if not addrs:
return '', ''
return addrs[0]
# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
def unquote(str):
"""Remove quotes from a string."""
if len(str) > 1:
if str.startswith('"') and str.endswith('"'):
return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
if str.startswith('<') and str.endswith('>'):
return str[1:-1]
return str
# RFC2231-related functions - parameter encoding and decoding
def decode_rfc2231(s):
"""Decode string according to RFC 2231"""
parts = s.split(TICK, 2)
if len(parts) <= 2:
return None, None, s
return parts
def encode_rfc2231(s, charset=None, language=None):
"""Encode string according to RFC 2231.
If neither charset nor language is given, then s is returned as-is. If
charset is given but not language, the string is encoded using the empty
string for language.
"""
s = urllib.parse.quote(s, safe='', encoding=charset or 'ascii')
if charset is None and language is None:
return s
if language is None:
language = ''
return "%s'%s'%s" % (charset, language, s)
rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$',
re.ASCII)
def decode_params(params):
"""Decode parameters list according to RFC 2231.
params is a sequence of 2-tuples containing (param name, string value).
"""
# Copy params so we don't mess with the original
params = params[:]
new_params = []
# Map parameter's name to a list of continuations. The values are a
# 3-tuple of the continuation number, the string value, and a flag
# specifying whether a particular segment is %-encoded.
rfc2231_params = {}
name, value = params.pop(0)
new_params.append((name, value))
while params:
name, value = params.pop(0)
if name.endswith('*'):
encoded = True
else:
encoded = False
value = unquote(value)
mo = rfc2231_continuation.match(name)
if mo:
name, num = mo.group('name', 'num')
if num is not None:
num = int(num)
rfc2231_params.setdefault(name, []).append((num, value, encoded))
else:
new_params.append((name, '"%s"' % quote(value)))
if rfc2231_params:
for name, continuations in rfc2231_params.items():
value = []
extended = False
# Sort by number
continuations.sort()
# And now append all values in numerical order, converting
# %-encodings for the encoded segments. If any of the
# continuation names ends in a *, then the entire string, after
# decoding segments and concatenating, must have the charset and
# language specifiers at the beginning of the string.
for num, s, encoded in continuations:
if encoded:
# Decode as "latin-1", so the characters in s directly
# represent the percent-encoded octet values.
# collapse_rfc2231_value treats this as an octet sequence.
s = urllib.parse.unquote(s, encoding="latin-1")
extended = True
value.append(s)
value = quote(EMPTYSTRING.join(value))
if extended:
charset, language, value = decode_rfc2231(value)
new_params.append((name, (charset, language, '"%s"' % value)))
else:
new_params.append((name, '"%s"' % value))
return new_params
def collapse_rfc2231_value(value, errors='replace',
fallback_charset='us-ascii'):
if not isinstance(value, tuple) or len(value) != 3:
return unquote(value)
# While value comes to us as a unicode string, we need it to be a bytes
# object. We do not want bytes() normal utf-8 decoder, we want a straight
# interpretation of the string as character bytes.
charset, language, text = value
rawbytes = bytes(text, 'raw-unicode-escape')
try:
return str(rawbytes, charset, errors)
except LookupError:
# charset is not a known codec.
return unquote(text)
#
# datetime doesn't provide a localtime function yet, so provide one. Code
# adapted from the patch in issue 9527. This may not be perfect, but it is
# better than not having it.
#
def localtime(dt=None, isdst=-1):
"""Return local time as an aware datetime object.
If called without arguments, return current time. Otherwise *dt*
argument should be a datetime instance, and it is converted to the
local time zone according to the system time zone database. If *dt* is
naive (that is, dt.tzinfo is None), it is assumed to be in local time.
In this case, a positive or zero value for *isdst* causes localtime to
presume initially that summer time (for example, Daylight Saving Time)
is or is not (respectively) in effect for the specified time. A
negative value for *isdst* causes the localtime() function to attempt
to divine whether summer time is in effect for the specified time.
"""
if dt is None:
return datetime.datetime.now(datetime.timezone.utc).astimezone()
if dt.tzinfo is not None:
return dt.astimezone()
# We have a naive datetime. Convert to a (localtime) timetuple and pass to
# system mktime together with the isdst hint. System mktime will return
# seconds since epoch.
tm = dt.timetuple()[:-1] + (isdst,)
seconds = time.mktime(tm)
localtm = time.localtime(seconds)
try:
delta = datetime.timedelta(seconds=localtm.tm_gmtoff)
tz = datetime.timezone(delta, localtm.tm_zone)
except AttributeError:
# Compute UTC offset and compare with the value implied by tm_isdst.
# If the values match, use the zone name implied by tm_isdst.
delta = dt - datetime.datetime(*time.gmtime(seconds)[:6])
dst = time.daylight and localtm.tm_isdst > 0
gmtoff = -(time.altzone if dst else time.timezone)
if delta == datetime.timedelta(seconds=gmtoff):
tz = datetime.timezone(delta, time.tzname[dst])
else:
tz = datetime.timezone(delta)
return dt.replace(tzinfo=tz)

38
upip/errno.py Normal file
View File

@ -0,0 +1,38 @@
EPERM = 1 # Operation not permitted
ENOENT = 2 # No such file or directory
ESRCH = 3 # No such process
EINTR = 4 # Interrupted system call
EIO = 5 # I/O error
ENXIO = 6 # No such device or address
E2BIG = 7 # Argument list too long
ENOEXEC = 8 # Exec format error
EBADF = 9 # Bad file number
ECHILD = 10 # No child processes
EAGAIN = 11 # Try again
ENOMEM = 12 # Out of memory
EACCES = 13 # Permission denied
EFAULT = 14 # Bad address
ENOTBLK = 15 # Block device required
EBUSY = 16 # Device or resource busy
EEXIST = 17 # File exists
EXDEV = 18 # Cross-device link
ENODEV = 19 # No such device
ENOTDIR = 20 # Not a directory
EISDIR = 21 # Is a directory
EINVAL = 22 # Invalid argument
ENFILE = 23 # File table overflow
EMFILE = 24 # Too many open files
ENOTTY = 25 # Not a typewriter
ETXTBSY = 26 # Text file busy
EFBIG = 27 # File too large
ENOSPC = 28 # No space left on device
ESPIPE = 29 # Illegal seek
EROFS = 30 # Read-only file system
EMLINK = 31 # Too many links
EPIPE = 32 # Broken pipe
EDOM = 33 # Math argument out of domain of func
ERANGE = 34 # Math result not representable
EAFNOSUPPORT = 97 # Address family not supported by protocol
ECONNRESET = 104 # Connection timed out
ETIMEDOUT = 110 # Connection timed out
EINPROGRESS = 115 # Operation now in progress

13
upip/example-extract.py Normal file
View File

@ -0,0 +1,13 @@
import sys
import os
import shutil
import utarfile
t = utarfile.TarFile(sys.argv[1])
for i in t:
print(i)
if i.type == utarfile.DIRTYPE:
os.makedirs(i.name)
else:
f = t.extractfile(i)
shutil.copyfileobj(f, open(i.name, "wb"))

28
upip/example.py Normal file
View File

@ -0,0 +1,28 @@
#
# uaiohttpclient - fetch URL passed as command line argument.
#
import uasyncio as asyncio
import uaiohttpclient as aiohttp
def print_stream(resp):
print((yield from resp.read()))
return
while True:
line = yield from reader.readline()
if not line:
break
print(line.rstrip())
def run(url):
resp = yield from aiohttp.request("GET", url)
print(resp)
yield from print_stream(resp)
import sys
import logging
logging.basicConfig(level=logging.INFO)
url = sys.argv[1]
loop = asyncio.get_event_loop()
loop.run_until_complete(run(url))
loop.close()

37
upip/fcntl.py Normal file
View File

@ -0,0 +1,37 @@
import ffi
import os
import ffilib
libc = ffilib.libc()
fcntl_l = libc.func("i", "fcntl", "iil")
fcntl_s = libc.func("i", "fcntl", "iip")
ioctl_l = libc.func("i", "ioctl", "iil")
ioctl_s = libc.func("i", "ioctl", "iip")
def fcntl(fd, op, arg=0):
if type(arg) is int:
r = fcntl_l(fd, op, arg)
os.check_error(r)
return r
else:
r = fcntl_s(fd, op, arg)
os.check_error(r)
# TODO: Not compliant. CPython says that arg should be immutable,
# and possibly mutated buffer is returned.
return r
def ioctl(fd, op, arg=0, mut=False):
if type(arg) is int:
r = ioctl_l(fd, op, arg)
os.check_error(r)
return r
else:
# TODO
assert mut
r = ioctl_s(fd, op, arg)
os.check_error(r)
return r

46
upip/ffilib.py Normal file
View File

@ -0,0 +1,46 @@
import sys
try:
import ffi
except ImportError:
ffi = None
_cache = {}
def open(name, maxver=10, extra=()):
if not ffi:
return None
try:
return _cache[name]
except KeyError:
pass
def libs():
if sys.platform == "linux":
yield '%s.so' % name
for i in range(maxver, -1, -1):
yield '%s.so.%u' % (name, i)
else:
for ext in ('dylib', 'dll'):
yield '%s.%s' % (name, ext)
for n in extra:
yield n
err = None
for n in libs():
try:
l = ffi.open(n)
_cache[name] = l
return l
except OSError as e:
err = e
raise err
def libc():
return open("libc", 6)
# Find out bitness of the platform, even if long ints are not supported
# TODO: All bitness differences should be removed from micropython-lib, and
# this snippet too.
bitness = 1
v = sys.maxsize
while v:
bitness += 1
v >>= 1

111
upip/fnmatch.py Normal file
View File

@ -0,0 +1,111 @@
"""Filename matching with shell patterns.
fnmatch(FILENAME, PATTERN) matches according to the local convention.
fnmatchcase(FILENAME, PATTERN) always takes case in account.
The functions operate by translating the pattern into a regular
expression. They cache the compiled regular expressions for speed.
The function translate(PATTERN) returns a regular expression
corresponding to PATTERN. (It does not compile it.)
"""
import os
import os.path
import posixpath
import re
#import functools
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
def fnmatch(name, pat):
"""Test whether FILENAME matches PATTERN.
Patterns are Unix shell style:
* matches everything
? matches any single character
[seq] matches any character in seq
[!seq] matches any char not in seq
An initial period in FILENAME is not special.
Both FILENAME and PATTERN are first case-normalized
if the operating system requires it.
If you don't want this, use fnmatchcase(FILENAME, PATTERN).
"""
name = os.path.normcase(name)
pat = os.path.normcase(pat)
return fnmatchcase(name, pat)
#@functools.lru_cache(maxsize=256, typed=True)
def _compile_pattern(pat):
if isinstance(pat, bytes):
pat_str = str(pat, 'ISO-8859-1')
res_str = translate(pat_str)
res = bytes(res_str, 'ISO-8859-1')
else:
res = translate(pat)
return re.compile(res).match
def filter(names, pat):
"""Return the subset of the list NAMES that match PAT."""
result = []
pat = os.path.normcase(pat)
match = _compile_pattern(pat)
if os.path is posixpath:
# normcase on posix is NOP. Optimize it away from the loop.
for name in names:
if match(name):
result.append(name)
else:
for name in names:
if match(os.path.normcase(name)):
result.append(name)
return result
def fnmatchcase(name, pat):
"""Test whether FILENAME matches PATTERN, including case.
This is a version of fnmatch() which doesn't case-normalize
its arguments.
"""
match = _compile_pattern(pat)
return match(name) is not None
def translate(pat):
"""Translate a shell PATTERN to a regular expression.
There is no way to quote meta-characters.
"""
i, n = 0, len(pat)
res = ''
while i < n:
c = pat[i]
i = i+1
if c == '*':
res = res + '.*'
elif c == '?':
res = res + '.'
elif c == '[':
j = i
if j < n and pat[j] == '!':
j = j+1
if j < n and pat[j] == ']':
j = j+1
while j < n and pat[j] != ']':
j = j+1
if j >= n:
res = res + '\\['
else:
stuff = pat[i:j].replace('\\','\\\\')
i = j+1
if stuff[0] == '!':
stuff = '^' + stuff[1:]
elif stuff[0] == '^':
stuff = '\\' + stuff
res = '%s[%s]' % (res, stuff)
else:
res = res + re.escape(c)
# Original patterns is undefined, see http://bugs.python.org/issue21464
return '(?ms)' + res + '\Z'

27
upip/functools.py Normal file
View File

@ -0,0 +1,27 @@
def partial(func, *args, **kwargs):
def _partial(*more_args, **more_kwargs):
kw = kwargs.copy()
kw.update(more_kwargs)
return func(*(args + more_args), **kw)
return _partial
def update_wrapper(wrapper, wrapped, assigned=None, updated=None):
# Dummy impl
return wrapper
def wraps(wrapped, assigned=None, updated=None):
# Dummy impl
return lambda x: x
def reduce(function, iterable, initializer=None):
it = iter(iterable)
if initializer is None:
value = next(it)
else:
value = initializer
for element in it:
value = function(value, element)
return value

215
upip/getopt.py Normal file
View File

@ -0,0 +1,215 @@
"""Parser for command line options.
This module helps scripts to parse the command line arguments in
sys.argv. It supports the same conventions as the Unix getopt()
function (including the special meanings of arguments of the form `-'
and `--'). Long options similar to those supported by GNU software
may be used as well via an optional third argument. This module
provides two functions and an exception:
getopt() -- Parse command line options
gnu_getopt() -- Like getopt(), but allow option and non-option arguments
to be intermixed.
GetoptError -- exception (class) raised with 'opt' attribute, which is the
option involved with the exception.
"""
# Long option support added by Lars Wirzenius <liw@iki.fi>.
#
# Gerrit Holl <gerrit@nl.linux.org> moved the string-based exceptions
# to class-based exceptions.
#
# Peter Åstrand <astrand@lysator.liu.se> added gnu_getopt().
#
# TODO for gnu_getopt():
#
# - GNU getopt_long_only mechanism
# - allow the caller to specify ordering
# - RETURN_IN_ORDER option
# - GNU extension with '-' as first character of option string
# - optional arguments, specified by double colons
# - a option string with a W followed by semicolon should
# treat "-W foo" as "--foo"
__all__ = ["GetoptError","error","getopt","gnu_getopt"]
import os
try:
from gettext import gettext as _
except ImportError:
# Bootstrapping Python: gettext's dependencies not built yet
def _(s): return s
class GetoptError(Exception):
opt = ''
msg = ''
def __init__(self, msg, opt=''):
self.msg = msg
self.opt = opt
Exception.__init__(self, msg, opt)
def __str__(self):
return self.msg
error = GetoptError # backward compatibility
def getopt(args, shortopts, longopts = []):
"""getopt(args, options[, long_options]) -> opts, args
Parses command line options and parameter list. args is the
argument list to be parsed, without the leading reference to the
running program. Typically, this means "sys.argv[1:]". shortopts
is the string of option letters that the script wants to
recognize, with options that require an argument followed by a
colon (i.e., the same format that Unix getopt() uses). If
specified, longopts is a list of strings with the names of the
long options which should be supported. The leading '--'
characters should not be included in the option name. Options
which require an argument should be followed by an equal sign
('=').
The return value consists of two elements: the first is a list of
(option, value) pairs; the second is the list of program arguments
left after the option list was stripped (this is a trailing slice
of the first argument). Each option-and-value pair returned has
the option as its first element, prefixed with a hyphen (e.g.,
'-x'), and the option argument as its second element, or an empty
string if the option has no argument. The options occur in the
list in the same order in which they were found, thus allowing
multiple occurrences. Long and short options may be mixed.
"""
opts = []
if type(longopts) == type(""):
longopts = [longopts]
else:
longopts = list(longopts)
while args and args[0].startswith('-') and args[0] != '-':
if args[0] == '--':
args = args[1:]
break
if args[0].startswith('--'):
opts, args = do_longs(opts, args[0][2:], longopts, args[1:])
else:
opts, args = do_shorts(opts, args[0][1:], shortopts, args[1:])
return opts, args
def gnu_getopt(args, shortopts, longopts = []):
"""getopt(args, options[, long_options]) -> opts, args
This function works like getopt(), except that GNU style scanning
mode is used by default. This means that option and non-option
arguments may be intermixed. The getopt() function stops
processing options as soon as a non-option argument is
encountered.
If the first character of the option string is `+', or if the
environment variable POSIXLY_CORRECT is set, then option
processing stops as soon as a non-option argument is encountered.
"""
opts = []
prog_args = []
if isinstance(longopts, str):
longopts = [longopts]
else:
longopts = list(longopts)
# Allow options after non-option arguments?
if shortopts.startswith('+'):
shortopts = shortopts[1:]
all_options_first = True
elif os.environ.get("POSIXLY_CORRECT"):
all_options_first = True
else:
all_options_first = False
while args:
if args[0] == '--':
prog_args += args[1:]
break
if args[0][:2] == '--':
opts, args = do_longs(opts, args[0][2:], longopts, args[1:])
elif args[0][:1] == '-' and args[0] != '-':
opts, args = do_shorts(opts, args[0][1:], shortopts, args[1:])
else:
if all_options_first:
prog_args += args
break
else:
prog_args.append(args[0])
args = args[1:]
return opts, prog_args
def do_longs(opts, opt, longopts, args):
try:
i = opt.index('=')
except ValueError:
optarg = None
else:
opt, optarg = opt[:i], opt[i+1:]
has_arg, opt = long_has_args(opt, longopts)
if has_arg:
if optarg is None:
if not args:
raise GetoptError(_('option --%s requires argument') % opt, opt)
optarg, args = args[0], args[1:]
elif optarg is not None:
raise GetoptError(_('option --%s must not have an argument') % opt, opt)
opts.append(('--' + opt, optarg or ''))
return opts, args
# Return:
# has_arg?
# full option name
def long_has_args(opt, longopts):
possibilities = [o for o in longopts if o.startswith(opt)]
if not possibilities:
raise GetoptError(_('option --%s not recognized') % opt, opt)
# Is there an exact match?
if opt in possibilities:
return False, opt
elif opt + '=' in possibilities:
return True, opt
# No exact match, so better be unique.
if len(possibilities) > 1:
# XXX since possibilities contains all valid continuations, might be
# nice to work them into the error msg
raise GetoptError(_('option --%s not a unique prefix') % opt, opt)
assert len(possibilities) == 1
unique_match = possibilities[0]
has_arg = unique_match.endswith('=')
if has_arg:
unique_match = unique_match[:-1]
return has_arg, unique_match
def do_shorts(opts, optstring, shortopts, args):
while optstring != '':
opt, optstring = optstring[0], optstring[1:]
if short_has_arg(opt, shortopts):
if optstring == '':
if not args:
raise GetoptError(_('option -%s requires argument') % opt,
opt)
optstring, args = args[0], args[1:]
optarg, optstring = optstring, ''
else:
optarg = ''
opts.append(('-' + opt, optarg))
return opts, args
def short_has_arg(opt, shortopts):
for i in range(len(shortopts)):
if opt == shortopts[i] != ':':
return shortopts.startswith(':', i+1)
raise GetoptError(_('option -%s not recognized') % opt, opt)
if __name__ == '__main__':
import sys
print(getopt(sys.argv[1:], "a:b", ["alpha=", "beta"]))

14
upip/gettext.py Normal file
View File

@ -0,0 +1,14 @@
import ffilib
libc = ffilib.libc()
gettext_ = libc.func("s", "gettext", "s")
ngettext_ = libc.func("s", "ngettext", "ssL")
def gettext(message):
return gettext_(message)
def ngettext(singular, plural, n):
return ngettext_(singular, plural, n)

95
upip/glob.py Normal file
View File

@ -0,0 +1,95 @@
"""Filename globbing utility."""
import os
import os.path
import re
import fnmatch
__all__ = ["glob", "iglob"]
def glob(pathname):
"""Return a list of paths matching a pathname pattern.
The pattern may contain simple shell-style wildcards a la
fnmatch. However, unlike fnmatch, filenames starting with a
dot are special cases that are not matched by '*' and '?'
patterns.
"""
return list(iglob(pathname))
def iglob(pathname):
"""Return an iterator which yields the paths matching a pathname pattern.
The pattern may contain simple shell-style wildcards a la
fnmatch. However, unlike fnmatch, filenames starting with a
dot are special cases that are not matched by '*' and '?'
patterns.
"""
if not has_magic(pathname):
if os.path.lexists(pathname):
yield pathname
return
dirname, basename = os.path.split(pathname)
if not dirname:
for name in glob1(None, basename):
yield name
return
# `os.path.split()` returns the argument itself as a dirname if it is a
# drive or UNC path. Prevent an infinite recursion if a drive or UNC path
# contains magic characters (i.e. r'\\?\C:').
if dirname != pathname and has_magic(dirname):
dirs = iglob(dirname)
else:
dirs = [dirname]
if has_magic(basename):
glob_in_dir = glob1
else:
glob_in_dir = glob0
for dirname in dirs:
for name in glob_in_dir(dirname, basename):
yield os.path.join(dirname, name)
# These 2 helper functions non-recursively glob inside a literal directory.
# They return a list of basenames. `glob1` accepts a pattern while `glob0`
# takes a literal basename (so it only has to check for its existence).
def glob1(dirname, pattern):
if not dirname:
if isinstance(pattern, bytes):
dirname = bytes(os.curdir, 'ASCII')
else:
dirname = os.curdir
try:
names = os.listdir(dirname)
except os.error:
return []
if not _ishidden(pattern):
names = [x for x in names if not _ishidden(x)]
return fnmatch.filter(names, pattern)
def glob0(dirname, basename):
if not basename:
# `os.path.split()` returns an empty basename for paths ending with a
# directory separator. 'q*x/' should match only directories.
if os.path.isdir(dirname):
return [basename]
else:
if os.path.lexists(os.path.join(dirname, basename)):
return [basename]
return []
magic_check = re.compile('[*?[]')
magic_check_bytes = re.compile(b'[*?[]')
def has_magic(s):
if isinstance(s, bytes):
match = magic_check_bytes.search(s)
else:
match = magic_check.search(s)
return match is not None
def _ishidden(path):
return path[0] in ('.', b'.'[0])

28
upip/gzip.py Normal file
View File

@ -0,0 +1,28 @@
#import zlib
import uzlib as zlib
FTEXT = 1
FHCRC = 2
FEXTRA = 4
FNAME = 8
FCOMMENT = 16
def decompress(data):
assert data[0] == 0x1f and data[1] == 0x8b
assert data[2] == 8
flg = data[3]
assert flg & 0xe0 == 0
i = 10
if flg & FEXTRA:
i += data[11] << 8 + data[10] + 2
if flg & FNAME:
while data[i]:
i += 1
i += 1
if flg & FCOMMENT:
while data[i]:
i += 1
i += 1
if flg & FHCRC:
i += 2
return zlib.decompress(memoryview(data)[i:], -15)

22
upip/hashlib/__init__.py Normal file
View File

@ -0,0 +1,22 @@
try:
import uhashlib
except ImportError:
uhashlib = None
def init():
for i in ("sha1", "sha224", "sha256", "sha384", "sha512"):
c = getattr(uhashlib, i, None)
if not c:
c = __import__("_" + i, None, None, (), 1)
c = getattr(c, i)
globals()[i] = c
init()
def new(algo, data=b""):
try:
c = globals()[algo]
return c(data)
except KeyError:
raise ValueError(algo)

1
upip/hashlib/_sha224.py Normal file
View File

@ -0,0 +1 @@
from ._sha256 import sha224

264
upip/hashlib/_sha256.py Normal file
View File

@ -0,0 +1,264 @@
SHA_BLOCKSIZE = 64
SHA_DIGESTSIZE = 32
def new_shaobject():
return {
'digest': [0]*8,
'count_lo': 0,
'count_hi': 0,
'data': [0]* SHA_BLOCKSIZE,
'local': 0,
'digestsize': 0
}
ROR = lambda x, y: (((x & 0xffffffff) >> (y & 31)) | (x << (32 - (y & 31)))) & 0xffffffff
Ch = lambda x, y, z: (z ^ (x & (y ^ z)))
Maj = lambda x, y, z: (((x | y) & z) | (x & y))
S = lambda x, n: ROR(x, n)
R = lambda x, n: (x & 0xffffffff) >> n
Sigma0 = lambda x: (S(x, 2) ^ S(x, 13) ^ S(x, 22))
Sigma1 = lambda x: (S(x, 6) ^ S(x, 11) ^ S(x, 25))
Gamma0 = lambda x: (S(x, 7) ^ S(x, 18) ^ R(x, 3))
Gamma1 = lambda x: (S(x, 17) ^ S(x, 19) ^ R(x, 10))
def sha_transform(sha_info):
W = []
d = sha_info['data']
for i in range(0,16):
W.append( (d[4*i]<<24) + (d[4*i+1]<<16) + (d[4*i+2]<<8) + d[4*i+3])
for i in range(16,64):
W.append( (Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]) & 0xffffffff )
ss = sha_info['digest'][:]
def RND(a,b,c,d,e,f,g,h,i,ki):
t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[i];
t1 = Sigma0(a) + Maj(a, b, c);
d += t0;
h = t0 + t1;
return d & 0xffffffff, h & 0xffffffff
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],0,0x428a2f98);
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],1,0x71374491);
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],2,0xb5c0fbcf);
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],3,0xe9b5dba5);
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],4,0x3956c25b);
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],5,0x59f111f1);
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],6,0x923f82a4);
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],7,0xab1c5ed5);
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],8,0xd807aa98);
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],9,0x12835b01);
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],10,0x243185be);
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],11,0x550c7dc3);
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],12,0x72be5d74);
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],13,0x80deb1fe);
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],14,0x9bdc06a7);
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],15,0xc19bf174);
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],16,0xe49b69c1);
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],17,0xefbe4786);
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],18,0x0fc19dc6);
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],19,0x240ca1cc);
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],20,0x2de92c6f);
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],21,0x4a7484aa);
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],22,0x5cb0a9dc);
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],23,0x76f988da);
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],24,0x983e5152);
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],25,0xa831c66d);
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],26,0xb00327c8);
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],27,0xbf597fc7);
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],28,0xc6e00bf3);
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],29,0xd5a79147);
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],30,0x06ca6351);
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],31,0x14292967);
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],32,0x27b70a85);
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],33,0x2e1b2138);
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],34,0x4d2c6dfc);
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],35,0x53380d13);
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],36,0x650a7354);
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],37,0x766a0abb);
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],38,0x81c2c92e);
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],39,0x92722c85);
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],40,0xa2bfe8a1);
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],41,0xa81a664b);
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],42,0xc24b8b70);
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],43,0xc76c51a3);
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],44,0xd192e819);
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],45,0xd6990624);
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],46,0xf40e3585);
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],47,0x106aa070);
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],48,0x19a4c116);
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],49,0x1e376c08);
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],50,0x2748774c);
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],51,0x34b0bcb5);
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],52,0x391c0cb3);
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],53,0x4ed8aa4a);
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],54,0x5b9cca4f);
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],55,0x682e6ff3);
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],56,0x748f82ee);
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],57,0x78a5636f);
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],58,0x84c87814);
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],59,0x8cc70208);
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],60,0x90befffa);
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],61,0xa4506ceb);
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],62,0xbef9a3f7);
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],63,0xc67178f2);
dig = []
for i, x in enumerate(sha_info['digest']):
dig.append( (x + ss[i]) & 0xffffffff )
sha_info['digest'] = dig
def sha_init():
sha_info = new_shaobject()
sha_info['digest'] = [0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19]
sha_info['count_lo'] = 0
sha_info['count_hi'] = 0
sha_info['local'] = 0
sha_info['digestsize'] = 32
return sha_info
def sha224_init():
sha_info = new_shaobject()
sha_info['digest'] = [0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4]
sha_info['count_lo'] = 0
sha_info['count_hi'] = 0
sha_info['local'] = 0
sha_info['digestsize'] = 28
return sha_info
def getbuf(s):
if isinstance(s, str):
return s.encode('ascii')
else:
return bytes(s)
def sha_update(sha_info, buffer):
if isinstance(buffer, str):
raise TypeError("Unicode strings must be encoded before hashing")
count = len(buffer)
buffer_idx = 0
clo = (sha_info['count_lo'] + (count << 3)) & 0xffffffff
if clo < sha_info['count_lo']:
sha_info['count_hi'] += 1
sha_info['count_lo'] = clo
sha_info['count_hi'] += (count >> 29)
if sha_info['local']:
i = SHA_BLOCKSIZE - sha_info['local']
if i > count:
i = count
# copy buffer
for x in enumerate(buffer[buffer_idx:buffer_idx+i]):
sha_info['data'][sha_info['local']+x[0]] = x[1]
count -= i
buffer_idx += i
sha_info['local'] += i
if sha_info['local'] == SHA_BLOCKSIZE:
sha_transform(sha_info)
sha_info['local'] = 0
else:
return
while count >= SHA_BLOCKSIZE:
# copy buffer
sha_info['data'] = list(buffer[buffer_idx:buffer_idx + SHA_BLOCKSIZE])
count -= SHA_BLOCKSIZE
buffer_idx += SHA_BLOCKSIZE
sha_transform(sha_info)
# copy buffer
pos = sha_info['local']
sha_info['data'][pos:pos+count] = list(buffer[buffer_idx:buffer_idx + count])
sha_info['local'] = count
def sha_final(sha_info):
lo_bit_count = sha_info['count_lo']
hi_bit_count = sha_info['count_hi']
count = (lo_bit_count >> 3) & 0x3f
sha_info['data'][count] = 0x80;
count += 1
if count > SHA_BLOCKSIZE - 8:
# zero the bytes in data after the count
sha_info['data'] = sha_info['data'][:count] + ([0] * (SHA_BLOCKSIZE - count))
sha_transform(sha_info)
# zero bytes in data
sha_info['data'] = [0] * SHA_BLOCKSIZE
else:
sha_info['data'] = sha_info['data'][:count] + ([0] * (SHA_BLOCKSIZE - count))
sha_info['data'][56] = (hi_bit_count >> 24) & 0xff
sha_info['data'][57] = (hi_bit_count >> 16) & 0xff
sha_info['data'][58] = (hi_bit_count >> 8) & 0xff
sha_info['data'][59] = (hi_bit_count >> 0) & 0xff
sha_info['data'][60] = (lo_bit_count >> 24) & 0xff
sha_info['data'][61] = (lo_bit_count >> 16) & 0xff
sha_info['data'][62] = (lo_bit_count >> 8) & 0xff
sha_info['data'][63] = (lo_bit_count >> 0) & 0xff
sha_transform(sha_info)
dig = []
for i in sha_info['digest']:
dig.extend([ ((i>>24) & 0xff), ((i>>16) & 0xff), ((i>>8) & 0xff), (i & 0xff) ])
return bytes(dig)
class sha256(object):
digest_size = digestsize = SHA_DIGESTSIZE
block_size = SHA_BLOCKSIZE
def __init__(self, s=None):
self._sha = sha_init()
if s:
sha_update(self._sha, getbuf(s))
def update(self, s):
sha_update(self._sha, getbuf(s))
def digest(self):
return sha_final(self._sha.copy())[:self._sha['digestsize']]
def hexdigest(self):
return ''.join(['%.2x' % i for i in self.digest()])
def copy(self):
new = sha256()
new._sha = self._sha.copy()
return new
class sha224(sha256):
digest_size = digestsize = 28
def __init__(self, s=None):
self._sha = sha224_init()
if s:
sha_update(self._sha, getbuf(s))
def copy(self):
new = sha224()
new._sha = self._sha.copy()
return new
def test():
a_str = "just a test string"
assert b"\xe3\xb0\xc4B\x98\xfc\x1c\x14\x9a\xfb\xf4\xc8\x99o\xb9$'\xaeA\xe4d\x9b\x93L\xa4\x95\x99\x1bxR\xb8U" == sha256().digest()
assert 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' == sha256().hexdigest()
assert 'd7b553c6f09ac85d142415f857c5310f3bbbe7cdd787cce4b985acedd585266f' == sha256(a_str).hexdigest()
assert '8113ebf33c97daa9998762aacafe750c7cefc2b2f173c90c59663a57fe626f21' == sha256(a_str*7).hexdigest()
s = sha256(a_str)
s.update(a_str)
assert '03d9963e05a094593190b6fc794cb1a3e1ac7d7883f0b5855268afeccc70d461' == s.hexdigest()
if __name__ == "__main__":
test()

1
upip/hashlib/_sha384.py Normal file
View File

@ -0,0 +1 @@
from ._sha512 import sha384

290
upip/hashlib/_sha512.py Normal file
View File

@ -0,0 +1,290 @@
"""
This code was Ported from CPython's sha512module.c
"""
SHA_BLOCKSIZE = 128
SHA_DIGESTSIZE = 64
def new_shaobject():
return {
'digest': [0]*8,
'count_lo': 0,
'count_hi': 0,
'data': [0]* SHA_BLOCKSIZE,
'local': 0,
'digestsize': 0
}
ROR64 = lambda x, y: (((x & 0xffffffffffffffff) >> (y & 63)) | (x << (64 - (y & 63)))) & 0xffffffffffffffff
Ch = lambda x, y, z: (z ^ (x & (y ^ z)))
Maj = lambda x, y, z: (((x | y) & z) | (x & y))
S = lambda x, n: ROR64(x, n)
R = lambda x, n: (x & 0xffffffffffffffff) >> n
Sigma0 = lambda x: (S(x, 28) ^ S(x, 34) ^ S(x, 39))
Sigma1 = lambda x: (S(x, 14) ^ S(x, 18) ^ S(x, 41))
Gamma0 = lambda x: (S(x, 1) ^ S(x, 8) ^ R(x, 7))
Gamma1 = lambda x: (S(x, 19) ^ S(x, 61) ^ R(x, 6))
def sha_transform(sha_info):
W = []
d = sha_info['data']
for i in range(0,16):
W.append( (d[8*i]<<56) + (d[8*i+1]<<48) + (d[8*i+2]<<40) + (d[8*i+3]<<32) + (d[8*i+4]<<24) + (d[8*i+5]<<16) + (d[8*i+6]<<8) + d[8*i+7])
for i in range(16,80):
W.append( (Gamma1(W[i - 2]) + W[i - 7] + Gamma0(W[i - 15]) + W[i - 16]) & 0xffffffffffffffff )
ss = sha_info['digest'][:]
def RND(a,b,c,d,e,f,g,h,i,ki):
t0 = (h + Sigma1(e) + Ch(e, f, g) + ki + W[i]) & 0xffffffffffffffff
t1 = (Sigma0(a) + Maj(a, b, c)) & 0xffffffffffffffff
d = (d + t0) & 0xffffffffffffffff
h = (t0 + t1) & 0xffffffffffffffff
return d & 0xffffffffffffffff, h & 0xffffffffffffffff
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],0,0x428a2f98d728ae22)
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],1,0x7137449123ef65cd)
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],2,0xb5c0fbcfec4d3b2f)
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],3,0xe9b5dba58189dbbc)
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],4,0x3956c25bf348b538)
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],5,0x59f111f1b605d019)
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],6,0x923f82a4af194f9b)
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],7,0xab1c5ed5da6d8118)
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],8,0xd807aa98a3030242)
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],9,0x12835b0145706fbe)
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],10,0x243185be4ee4b28c)
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],11,0x550c7dc3d5ffb4e2)
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],12,0x72be5d74f27b896f)
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],13,0x80deb1fe3b1696b1)
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],14,0x9bdc06a725c71235)
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],15,0xc19bf174cf692694)
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],16,0xe49b69c19ef14ad2)
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],17,0xefbe4786384f25e3)
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],18,0x0fc19dc68b8cd5b5)
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],19,0x240ca1cc77ac9c65)
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],20,0x2de92c6f592b0275)
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],21,0x4a7484aa6ea6e483)
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],22,0x5cb0a9dcbd41fbd4)
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],23,0x76f988da831153b5)
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],24,0x983e5152ee66dfab)
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],25,0xa831c66d2db43210)
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],26,0xb00327c898fb213f)
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],27,0xbf597fc7beef0ee4)
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],28,0xc6e00bf33da88fc2)
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],29,0xd5a79147930aa725)
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],30,0x06ca6351e003826f)
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],31,0x142929670a0e6e70)
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],32,0x27b70a8546d22ffc)
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],33,0x2e1b21385c26c926)
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],34,0x4d2c6dfc5ac42aed)
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],35,0x53380d139d95b3df)
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],36,0x650a73548baf63de)
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],37,0x766a0abb3c77b2a8)
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],38,0x81c2c92e47edaee6)
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],39,0x92722c851482353b)
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],40,0xa2bfe8a14cf10364)
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],41,0xa81a664bbc423001)
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],42,0xc24b8b70d0f89791)
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],43,0xc76c51a30654be30)
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],44,0xd192e819d6ef5218)
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],45,0xd69906245565a910)
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],46,0xf40e35855771202a)
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],47,0x106aa07032bbd1b8)
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],48,0x19a4c116b8d2d0c8)
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],49,0x1e376c085141ab53)
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],50,0x2748774cdf8eeb99)
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],51,0x34b0bcb5e19b48a8)
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],52,0x391c0cb3c5c95a63)
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],53,0x4ed8aa4ae3418acb)
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],54,0x5b9cca4f7763e373)
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],55,0x682e6ff3d6b2b8a3)
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],56,0x748f82ee5defb2fc)
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],57,0x78a5636f43172f60)
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],58,0x84c87814a1f0ab72)
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],59,0x8cc702081a6439ec)
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],60,0x90befffa23631e28)
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],61,0xa4506cebde82bde9)
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],62,0xbef9a3f7b2c67915)
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],63,0xc67178f2e372532b)
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],64,0xca273eceea26619c)
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],65,0xd186b8c721c0c207)
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],66,0xeada7dd6cde0eb1e)
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],67,0xf57d4f7fee6ed178)
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],68,0x06f067aa72176fba)
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],69,0x0a637dc5a2c898a6)
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],70,0x113f9804bef90dae)
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],71,0x1b710b35131c471b)
ss[3], ss[7] = RND(ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],72,0x28db77f523047d84)
ss[2], ss[6] = RND(ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],73,0x32caab7b40c72493)
ss[1], ss[5] = RND(ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],74,0x3c9ebe0a15c9bebc)
ss[0], ss[4] = RND(ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],ss[4],75,0x431d67c49c100d4c)
ss[7], ss[3] = RND(ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],ss[3],76,0x4cc5d4becb3e42b6)
ss[6], ss[2] = RND(ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],ss[2],77,0x597f299cfc657e2a)
ss[5], ss[1] = RND(ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],ss[1],78,0x5fcb6fab3ad6faec)
ss[4], ss[0] = RND(ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[0],79,0x6c44198c4a475817)
dig = []
for i, x in enumerate(sha_info['digest']):
dig.append( (x + ss[i]) & 0xffffffffffffffff )
sha_info['digest'] = dig
def sha_init():
sha_info = new_shaobject()
sha_info['digest'] = [ 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179]
sha_info['count_lo'] = 0
sha_info['count_hi'] = 0
sha_info['local'] = 0
sha_info['digestsize'] = 64
return sha_info
def sha384_init():
sha_info = new_shaobject()
sha_info['digest'] = [ 0xcbbb9d5dc1059ed8, 0x629a292a367cd507, 0x9159015a3070dd17, 0x152fecd8f70e5939, 0x67332667ffc00b31, 0x8eb44a8768581511, 0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4]
sha_info['count_lo'] = 0
sha_info['count_hi'] = 0
sha_info['local'] = 0
sha_info['digestsize'] = 48
return sha_info
def getbuf(s):
if isinstance(s, str):
return s.encode('ascii')
else:
return bytes(s)
def sha_update(sha_info, buffer):
if isinstance(buffer, str):
raise TypeError("Unicode strings must be encoded before hashing")
count = len(buffer)
buffer_idx = 0
clo = (sha_info['count_lo'] + (count << 3)) & 0xffffffff
if clo < sha_info['count_lo']:
sha_info['count_hi'] += 1
sha_info['count_lo'] = clo
sha_info['count_hi'] += (count >> 29)
if sha_info['local']:
i = SHA_BLOCKSIZE - sha_info['local']
if i > count:
i = count
# copy buffer
for x in enumerate(buffer[buffer_idx:buffer_idx+i]):
sha_info['data'][sha_info['local']+x[0]] = x[1]
count -= i
buffer_idx += i
sha_info['local'] += i
if sha_info['local'] == SHA_BLOCKSIZE:
sha_transform(sha_info)
sha_info['local'] = 0
else:
return
while count >= SHA_BLOCKSIZE:
# copy buffer
sha_info['data'] = list(buffer[buffer_idx:buffer_idx + SHA_BLOCKSIZE])
count -= SHA_BLOCKSIZE
buffer_idx += SHA_BLOCKSIZE
sha_transform(sha_info)
# copy buffer
pos = sha_info['local']
sha_info['data'][pos:pos+count] = list(buffer[buffer_idx:buffer_idx + count])
sha_info['local'] = count
def sha_final(sha_info):
lo_bit_count = sha_info['count_lo']
hi_bit_count = sha_info['count_hi']
count = (lo_bit_count >> 3) & 0x7f
sha_info['data'][count] = 0x80;
count += 1
if count > SHA_BLOCKSIZE - 16:
# zero the bytes in data after the count
sha_info['data'] = sha_info['data'][:count] + ([0] * (SHA_BLOCKSIZE - count))
sha_transform(sha_info)
# zero bytes in data
sha_info['data'] = [0] * SHA_BLOCKSIZE
else:
sha_info['data'] = sha_info['data'][:count] + ([0] * (SHA_BLOCKSIZE - count))
sha_info['data'][112] = 0;
sha_info['data'][113] = 0;
sha_info['data'][114] = 0;
sha_info['data'][115] = 0;
sha_info['data'][116] = 0;
sha_info['data'][117] = 0;
sha_info['data'][118] = 0;
sha_info['data'][119] = 0;
sha_info['data'][120] = (hi_bit_count >> 24) & 0xff
sha_info['data'][121] = (hi_bit_count >> 16) & 0xff
sha_info['data'][122] = (hi_bit_count >> 8) & 0xff
sha_info['data'][123] = (hi_bit_count >> 0) & 0xff
sha_info['data'][124] = (lo_bit_count >> 24) & 0xff
sha_info['data'][125] = (lo_bit_count >> 16) & 0xff
sha_info['data'][126] = (lo_bit_count >> 8) & 0xff
sha_info['data'][127] = (lo_bit_count >> 0) & 0xff
sha_transform(sha_info)
dig = []
for i in sha_info['digest']:
dig.extend([ ((i>>56) & 0xff), ((i>>48) & 0xff), ((i>>40) & 0xff), ((i>>32) & 0xff), ((i>>24) & 0xff), ((i>>16) & 0xff), ((i>>8) & 0xff), (i & 0xff) ])
return bytes(dig)
class sha512(object):
digest_size = digestsize = SHA_DIGESTSIZE
block_size = SHA_BLOCKSIZE
def __init__(self, s=None):
self._sha = sha_init()
if s:
sha_update(self._sha, getbuf(s))
def update(self, s):
sha_update(self._sha, getbuf(s))
def digest(self):
return sha_final(self._sha.copy())[:self._sha['digestsize']]
def hexdigest(self):
return ''.join(['%.2x' % i for i in self.digest()])
def copy(self):
new = sha512()
new._sha = self._sha.copy()
return new
class sha384(sha512):
digest_size = digestsize = 48
def __init__(self, s=None):
self._sha = sha384_init()
if s:
sha_update(self._sha, getbuf(s))
def copy(self):
new = sha384()
new._sha = self._sha.copy()
return new
def test():
a_str = "just a test string"
assert sha512().digest() == b"\xcf\x83\xe15~\xef\xb8\xbd\xf1T(P\xd6m\x80\x07\xd6 \xe4\x05\x0bW\x15\xdc\x83\xf4\xa9!\xd3l\xe9\xceG\xd0\xd1<]\x85\xf2\xb0\xff\x83\x18\xd2\x87~\xec/c\xb91\xbdGAz\x81\xa582z\xf9'\xda>"
assert sha512().hexdigest() == 'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e'
assert sha512(a_str).hexdigest() == '68be4c6664af867dd1d01c8d77e963d87d77b702400c8fabae355a41b8927a5a5533a7f1c28509bbd65c5f3ac716f33be271fbda0ca018b71a84708c9fae8a53'
assert sha512(a_str*7).hexdigest() == '3233acdbfcfff9bff9fc72401d31dbffa62bd24e9ec846f0578d647da73258d9f0879f7fde01fe2cc6516af3f343807fdef79e23d696c923d79931db46bf1819'
s = sha512(a_str)
s.update(a_str)
assert s.hexdigest() == '341aeb668730bbb48127d5531115f3c39d12cb9586a6ca770898398aff2411087cfe0b570689adf328cddeb1f00803acce6737a19f310b53bbdb0320828f75bb'
if __name__ == "__main__":
test()

480
upip/heapq.py Normal file
View File

@ -0,0 +1,480 @@
"""Heap queue algorithm (a.k.a. priority queue).
Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for
all k, counting elements from 0. For the sake of comparison,
non-existing elements are considered to be infinite. The interesting
property of a heap is that a[0] is always its smallest element.
Usage:
heap = [] # creates an empty heap
heappush(heap, item) # pushes a new item on the heap
item = heappop(heap) # pops the smallest item from the heap
item = heap[0] # smallest item on the heap without popping it
heapify(x) # transforms list into a heap, in-place, in linear time
item = heapreplace(heap, item) # pops and returns smallest item, and adds
# new item; the heap size is unchanged
Our API differs from textbook heap algorithms as follows:
- We use 0-based indexing. This makes the relationship between the
index for a node and the indexes for its children slightly less
obvious, but is more suitable since Python uses 0-based indexing.
- Our heappop() method returns the smallest item, not the largest.
These two make it possible to view the heap as a regular Python list
without surprises: heap[0] is the smallest item, and heap.sort()
maintains the heap invariant!
"""
# Original code by Kevin O'Connor, augmented by Tim Peters and Raymond Hettinger
"""Heap queues
[explanation by François Pinard]
Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for
all k, counting elements from 0. For the sake of comparison,
non-existing elements are considered to be infinite. The interesting
property of a heap is that a[0] is always its smallest element.
The strange invariant above is meant to be an efficient memory
representation for a tournament. The numbers below are `k', not a[k]:
0
1 2
3 4 5 6
7 8 9 10 11 12 13 14
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In
an usual binary tournament we see in sports, each cell is the winner
over the two cells it tops, and we can trace the winner down the tree
to see all opponents s/he had. However, in many computer applications
of such tournaments, we do not need to trace the history of a winner.
To be more memory efficient, when a winner is promoted, we try to
replace it by something else at a lower level, and the rule becomes
that a cell and the two cells it tops contain three different items,
but the top cell "wins" over the two topped cells.
If this heap invariant is protected at all time, index 0 is clearly
the overall winner. The simplest algorithmic way to remove it and
find the "next" winner is to move some loser (let's say cell 30 in the
diagram above) into the 0 position, and then percolate this new 0 down
the tree, exchanging values, until the invariant is re-established.
This is clearly logarithmic on the total number of items in the tree.
By iterating over all items, you get an O(n ln n) sort.
A nice feature of this sort is that you can efficiently insert new
items while the sort is going on, provided that the inserted items are
not "better" than the last 0'th element you extracted. This is
especially useful in simulation contexts, where the tree holds all
incoming events, and the "win" condition means the smallest scheduled
time. When an event schedule other events for execution, they are
scheduled into the future, so they can easily go into the heap. So, a
heap is a good structure for implementing schedulers (this is what I
used for my MIDI sequencer :-).
Various structures for implementing schedulers have been extensively
studied, and heaps are good for this, as they are reasonably speedy,
the speed is almost constant, and the worst case is not much different
than the average case. However, there are other representations which
are more efficient overall, yet the worst cases might be terrible.
Heaps are also very useful in big disk sorts. You most probably all
know that a big sort implies producing "runs" (which are pre-sorted
sequences, which size is usually related to the amount of CPU memory),
followed by a merging passes for these runs, which merging is often
very cleverly organised[1]. It is very important that the initial
sort produces the longest runs possible. Tournaments are a good way
to that. If, using all the memory available to hold a tournament, you
replace and percolate items that happen to fit the current run, you'll
produce runs which are twice the size of the memory for random input,
and much better for input fuzzily ordered.
Moreover, if you output the 0'th item on disk and get an input which
may not fit in the current tournament (because the value "wins" over
the last output value), it cannot fit in the heap, so the size of the
heap decreases. The freed memory could be cleverly reused immediately
for progressively building a second heap, which grows at exactly the
same rate the first heap is melting. When the first heap completely
vanishes, you switch heaps and start a new run. Clever and quite
effective!
In a word, heaps are useful memory structures to know. I use them in
a few applications, and I think it is good to keep a `heap' module
around. :-)
--------------------
[1] The disk balancing algorithms which are current, nowadays, are
more annoying than clever, and this is a consequence of the seeking
capabilities of the disks. On devices which cannot seek, like big
tape drives, the story was quite different, and one had to be very
clever to ensure (far in advance) that each tape movement will be the
most effective possible (that is, will best participate at
"progressing" the merge). Some tapes were even able to read
backwards, and this was also used to avoid the rewinding time.
Believe me, real good tape sorts were quite spectacular to watch!
From all times, sorting has always been a Great Art! :-)
"""
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
'nlargest', 'nsmallest', 'heappushpop']
#from itertools import count, tee, chain
def heappush(heap, item):
"""Push item onto heap, maintaining the heap invariant."""
heap.append(item)
_siftdown(heap, 0, len(heap)-1)
def heappop(heap):
"""Pop the smallest item off the heap, maintaining the heap invariant."""
lastelt = heap.pop() # raises appropriate IndexError if heap is empty
if heap:
returnitem = heap[0]
heap[0] = lastelt
_siftup(heap, 0)
else:
returnitem = lastelt
return returnitem
def heapreplace(heap, item):
"""Pop and return the current smallest value, and add the new item.
This is more efficient than heappop() followed by heappush(), and can be
more appropriate when using a fixed-size heap. Note that the value
returned may be larger than item! That constrains reasonable uses of
this routine unless written as part of a conditional replacement:
if item > heap[0]:
item = heapreplace(heap, item)
"""
returnitem = heap[0] # raises appropriate IndexError if heap is empty
heap[0] = item
_siftup(heap, 0)
return returnitem
def heappushpop(heap, item):
"""Fast version of a heappush followed by a heappop."""
if heap and heap[0] < item:
item, heap[0] = heap[0], item
_siftup(heap, 0)
return item
def heapify(x):
"""Transform list into a heap, in-place, in O(len(x)) time."""
n = len(x)
# Transform bottom-up. The largest index there's any point to looking at
# is the largest with a child index in-range, so must have 2*i + 1 < n,
# or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so
# j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is
# (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1.
for i in reversed(range(n//2)):
_siftup(x, i)
def _heappushpop_max(heap, item):
"""Maxheap version of a heappush followed by a heappop."""
if heap and item < heap[0]:
item, heap[0] = heap[0], item
_siftup_max(heap, 0)
return item
def _heapify_max(x):
"""Transform list into a maxheap, in-place, in O(len(x)) time."""
n = len(x)
for i in reversed(range(n//2)):
_siftup_max(x, i)
def nlargest(n, iterable):
"""Find the n largest elements in a dataset.
Equivalent to: sorted(iterable, reverse=True)[:n]
"""
from itertools import islice, count, tee, chain
if n < 0:
return []
it = iter(iterable)
result = list(islice(it, n))
if not result:
return result
heapify(result)
_heappushpop = heappushpop
for elem in it:
_heappushpop(result, elem)
result.sort(reverse=True)
return result
def nsmallest(n, iterable):
"""Find the n smallest elements in a dataset.
Equivalent to: sorted(iterable)[:n]
"""
from itertools import islice, count, tee, chain
if n < 0:
return []
it = iter(iterable)
result = list(islice(it, n))
if not result:
return result
_heapify_max(result)
_heappushpop = _heappushpop_max
for elem in it:
_heappushpop(result, elem)
result.sort()
return result
# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos
# is the index of a leaf with a possibly out-of-order value. Restore the
# heap invariant.
def _siftdown(heap, startpos, pos):
newitem = heap[pos]
# Follow the path to the root, moving parents down until finding a place
# newitem fits.
while pos > startpos:
parentpos = (pos - 1) >> 1
parent = heap[parentpos]
if newitem < parent:
heap[pos] = parent
pos = parentpos
continue
break
heap[pos] = newitem
# The child indices of heap index pos are already heaps, and we want to make
# a heap at index pos too. We do this by bubbling the smaller child of
# pos up (and so on with that child's children, etc) until hitting a leaf,
# then using _siftdown to move the oddball originally at index pos into place.
#
# We *could* break out of the loop as soon as we find a pos where newitem <=
# both its children, but turns out that's not a good idea, and despite that
# many books write the algorithm that way. During a heap pop, the last array
# element is sifted in, and that tends to be large, so that comparing it
# against values starting from the root usually doesn't pay (= usually doesn't
# get us out of the loop early). See Knuth, Volume 3, where this is
# explained and quantified in an exercise.
#
# Cutting the # of comparisons is important, since these routines have no
# way to extract "the priority" from an array element, so that intelligence
# is likely to be hiding in custom comparison methods, or in array elements
# storing (priority, record) tuples. Comparisons are thus potentially
# expensive.
#
# On random arrays of length 1000, making this change cut the number of
# comparisons made by heapify() a little, and those made by exhaustive
# heappop() a lot, in accord with theory. Here are typical results from 3
# runs (3 just to demonstrate how small the variance is):
#
# Compares needed by heapify Compares needed by 1000 heappops
# -------------------------- --------------------------------
# 1837 cut to 1663 14996 cut to 8680
# 1855 cut to 1659 14966 cut to 8678
# 1847 cut to 1660 15024 cut to 8703
#
# Building the heap by using heappush() 1000 times instead required
# 2198, 2148, and 2219 compares: heapify() is more efficient, when
# you can use it.
#
# The total compares needed by list.sort() on the same lists were 8627,
# 8627, and 8632 (this should be compared to the sum of heapify() and
# heappop() compares): list.sort() is (unsurprisingly!) more efficient
# for sorting.
def _siftup(heap, pos):
endpos = len(heap)
startpos = pos
newitem = heap[pos]
# Bubble up the smaller child until hitting a leaf.
childpos = 2*pos + 1 # leftmost child position
while childpos < endpos:
# Set childpos to index of smaller child.
rightpos = childpos + 1
if rightpos < endpos and not heap[childpos] < heap[rightpos]:
childpos = rightpos
# Move the smaller child up.
heap[pos] = heap[childpos]
pos = childpos
childpos = 2*pos + 1
# The leaf at pos is empty now. Put newitem there, and bubble it up
# to its final resting place (by sifting its parents down).
heap[pos] = newitem
_siftdown(heap, startpos, pos)
def _siftdown_max(heap, startpos, pos):
'Maxheap variant of _siftdown'
newitem = heap[pos]
# Follow the path to the root, moving parents down until finding a place
# newitem fits.
while pos > startpos:
parentpos = (pos - 1) >> 1
parent = heap[parentpos]
if parent < newitem:
heap[pos] = parent
pos = parentpos
continue
break
heap[pos] = newitem
def _siftup_max(heap, pos):
'Maxheap variant of _siftup'
endpos = len(heap)
startpos = pos
newitem = heap[pos]
# Bubble up the larger child until hitting a leaf.
childpos = 2*pos + 1 # leftmost child position
while childpos < endpos:
# Set childpos to index of larger child.
rightpos = childpos + 1
if rightpos < endpos and not heap[rightpos] < heap[childpos]:
childpos = rightpos
# Move the larger child up.
heap[pos] = heap[childpos]
pos = childpos
childpos = 2*pos + 1
# The leaf at pos is empty now. Put newitem there, and bubble it up
# to its final resting place (by sifting its parents down).
heap[pos] = newitem
_siftdown_max(heap, startpos, pos)
# If available, use C implementation
try:
from _heapq import *
except ImportError:
pass
def merge(*iterables):
'''Merge multiple sorted inputs into a single sorted output.
Similar to sorted(itertools.chain(*iterables)) but returns a generator,
does not pull the data into memory all at once, and assumes that each of
the input streams is already sorted (smallest to largest).
>>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
[0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
'''
_heappop, _heapreplace, _StopIteration = heappop, heapreplace, StopIteration
_len = len
h = []
h_append = h.append
for itnum, it in enumerate(map(iter, iterables)):
try:
next = it.__next__
h_append([next(), itnum, next])
except _StopIteration:
pass
heapify(h)
while _len(h) > 1:
try:
while True:
v, itnum, next = s = h[0]
yield v
s[0] = next() # raises StopIteration when exhausted
_heapreplace(h, s) # restore heap condition
except _StopIteration:
_heappop(h) # remove empty iterator
if h:
# fast case when only a single iterator remains
v, itnum, next = h[0]
yield v
yield from next.__self__
# Extend the implementations of nsmallest and nlargest to use a key= argument
_nsmallest = nsmallest
def nsmallest(n, iterable, key=None):
"""Find the n smallest elements in a dataset.
Equivalent to: sorted(iterable, key=key)[:n]
"""
from itertools import islice, count, tee, chain
# Short-cut for n==1 is to use min() when len(iterable)>0
if n == 1:
it = iter(iterable)
head = list(islice(it, 1))
if not head:
return []
if key is None:
return [min(chain(head, it))]
return [min(chain(head, it), key=key)]
# When n>=size, it's faster to use sorted()
try:
size = len(iterable)
except (TypeError, AttributeError):
pass
else:
if n >= size:
return sorted(iterable, key=key)[:n]
# When key is none, use simpler decoration
if key is None:
it = zip(iterable, count()) # decorate
result = _nsmallest(n, it)
return [r[0] for r in result] # undecorate
# General case, slowest method
in1, in2 = tee(iterable)
it = zip(map(key, in1), count(), in2) # decorate
result = _nsmallest(n, it)
return [r[2] for r in result] # undecorate
_nlargest = nlargest
def nlargest(n, iterable, key=None):
"""Find the n largest elements in a dataset.
Equivalent to: sorted(iterable, key=key, reverse=True)[:n]
"""
from itertools import islice, count, tee, chain
# Short-cut for n==1 is to use max() when len(iterable)>0
if n == 1:
it = iter(iterable)
head = list(islice(it, 1))
if not head:
return []
if key is None:
return [max(chain(head, it))]
return [max(chain(head, it), key=key)]
# When n>=size, it's faster to use sorted()
try:
size = len(iterable)
except (TypeError, AttributeError):
pass
else:
if n >= size:
return sorted(iterable, key=key, reverse=True)[:n]
# When key is none, use simpler decoration
if key is None:
it = zip(iterable, count(0,-1)) # decorate
result = _nlargest(n, it)
return [r[0] for r in result] # undecorate
# General case, slowest method
in1, in2 = tee(iterable)
it = zip(map(key, in1), count(0,-1), in2) # decorate
result = _nlargest(n, it)
return [r[2] for r in result] # undecorate
if __name__ == "__main__":
# Simple sanity test
heap = []
data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 0]
for item in data:
heappush(heap, item)
sort = []
while heap:
sort.append(heappop(heap))
print(sort)
import doctest
doctest.testmod()

149
upip/hmac.py Normal file
View File

@ -0,0 +1,149 @@
"""HMAC (Keyed-Hashing for Message Authentication) Python module.
Implements the HMAC algorithm as described by RFC 2104.
"""
import warnings as _warnings
#from _operator import _compare_digest as compare_digest
import hashlib as _hashlib
PendingDeprecationWarning = None
RuntimeWarning = None
trans_5C = bytes((x ^ 0x5C) for x in range(256))
trans_36 = bytes((x ^ 0x36) for x in range(256))
def translate(d, t):
return bytes(t[x] for x in d)
# The size of the digests returned by HMAC depends on the underlying
# hashing module used. Use digest_size from the instance of HMAC instead.
digest_size = None
class HMAC:
"""RFC 2104 HMAC class. Also complies with RFC 4231.
This supports the API for Cryptographic Hash Functions (PEP 247).
"""
blocksize = 64 # 512-bit HMAC; can be changed in subclasses.
def __init__(self, key, msg = None, digestmod = None):
"""Create a new HMAC object.
key: key for the keyed hash object.
msg: Initial input for the hash, if provided.
digestmod: A module supporting PEP 247. *OR*
A hashlib constructor returning a new hash object. *OR*
A hash name suitable for hashlib.new().
Defaults to hashlib.md5.
Implicit default to hashlib.md5 is deprecated and will be
removed in Python 3.6.
Note: key and msg must be a bytes or bytearray objects.
"""
if not isinstance(key, (bytes, bytearray)):
raise TypeError("key: expected bytes or bytearray, but got %r" % type(key).__name__)
if digestmod is None:
_warnings.warn("HMAC() without an explicit digestmod argument "
"is deprecated.", PendingDeprecationWarning, 2)
digestmod = _hashlib.md5
if callable(digestmod):
self.digest_cons = digestmod
elif isinstance(digestmod, str):
self.digest_cons = lambda d=b'': _hashlib.new(digestmod, d)
else:
self.digest_cons = lambda d=b'': digestmod.new(d)
self.outer = self.digest_cons()
self.inner = self.digest_cons()
self.digest_size = self.inner.digest_size
if hasattr(self.inner, 'block_size'):
blocksize = self.inner.block_size
if blocksize < 16:
_warnings.warn('block_size of %d seems too small; using our '
'default of %d.' % (blocksize, self.blocksize),
RuntimeWarning, 2)
blocksize = self.blocksize
else:
_warnings.warn('No block_size attribute on given digest object; '
'Assuming %d.' % (self.blocksize),
RuntimeWarning, 2)
blocksize = self.blocksize
# self.blocksize is the default blocksize. self.block_size is
# effective block size as well as the public API attribute.
self.block_size = blocksize
if len(key) > blocksize:
key = self.digest_cons(key).digest()
key = key + bytes(blocksize - len(key))
self.outer.update(translate(key, trans_5C))
self.inner.update(translate(key, trans_36))
if msg is not None:
self.update(msg)
@property
def name(self):
return "hmac-" + self.inner.name
def update(self, msg):
"""Update this hashing object with the string msg.
"""
self.inner.update(msg)
def copy(self):
"""Return a separate copy of this hashing object.
An update to this copy won't affect the original object.
"""
# Call __new__ directly to avoid the expensive __init__.
other = self.__class__.__new__(self.__class__)
other.digest_cons = self.digest_cons
other.digest_size = self.digest_size
other.inner = self.inner.copy()
other.outer = self.outer.copy()
return other
def _current(self):
"""Return a hash object for the current state.
To be used only internally with digest() and hexdigest().
"""
h = self.outer.copy()
h.update(self.inner.digest())
return h
def digest(self):
"""Return the hash value of this hashing object.
This returns a string containing 8-bit data. The object is
not altered in any way by this function; you can continue
updating the object after calling this function.
"""
h = self._current()
return h.digest()
def hexdigest(self):
"""Like digest(), but returns a string of hexadecimal digits instead.
"""
h = self._current()
return h.hexdigest()
def new(key, msg = None, digestmod = None):
"""Create a new hashing object and return it.
key: The starting key for the hash.
msg: if available, will immediately be hashed into the object's starting
state.
You can now feed arbitrary strings into the object using its update()
method, and can ask for the hash value at any time by calling its digest()
method.
"""
return HMAC(key, msg, digestmod)

22
upip/html/__init__.py Normal file
View File

@ -0,0 +1,22 @@
"""
General functions for HTML manipulation.
"""
_escape_map = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;'}
_escape_map_full = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;',
ord('"'): '&quot;', ord('\''): '&#x27;'}
# NB: this is a candidate for a bytes/string polymorphic interface
def escape(s, quote=True):
"""
Replace special characters "&", "<" and ">" to HTML-safe sequences.
If the optional flag quote is true (the default), the quotation mark
characters, both double quote (") and single quote (') characters are also
translated.
"""
import string
if quote:
return string.translate(s, _escape_map_full)
return string.translate(s, _escape_map)

2506
upip/html/entities.py Normal file

File diff suppressed because it is too large Load Diff

532
upip/html/parser.py Normal file
View File

@ -0,0 +1,532 @@
"""A parser for HTML and XHTML."""
# This file is based on sgmllib.py, but the API is slightly different.
# XXX There should be a way to distinguish between PCDATA (parsed
# character data -- the normal case), RCDATA (replaceable character
# data -- only char and entity references and end tags are special)
# and CDATA (character data -- only end tags are special).
import _markupbase
import re
import warnings
# Regular expressions used for parsing
interesting_normal = re.compile('[&<]')
incomplete = re.compile('&[a-zA-Z#]')
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
starttagopen = re.compile('<[a-zA-Z]')
piclose = re.compile('>')
commentclose = re.compile(r'--\s*>')
tagfind = re.compile('([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*')
# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state
# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state
tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\\x00]*')
# Note:
# 1) the strict attrfind isn't really strict, but we can't make it
# correctly strict without breaking backward compatibility;
# 2) if you change attrfind remember to update locatestarttagend too;
# 3) if you change attrfind and/or locatestarttagend the parser will
# explode, so don't do it.
attrfind = re.compile(
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
attrfind_tolerant = re.compile(
r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*'
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*')
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
(?:\s+ # whitespace before attribute name
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
(?:\s*=\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|\"[^\"]*\" # LIT-enclosed value
|[^'\">\s]+ # bare value
)
)?
)
)*
\s* # trailing whitespace
""", re.VERBOSE)
locatestarttagend_tolerant = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
(?:[\s/]* # optional whitespace before attribute name
(?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name
(?:\s*=+\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|"[^"]*" # LIT-enclosed value
|(?!['"])[^>\s]* # bare value
)
(?:\s*,)* # possibly followed by a comma
)?(?:\s|/(?!>))*
)*
)?
\s* # trailing whitespace
""", re.VERBOSE)
endendtag = re.compile('>')
# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between
# </ and the tag name, so maybe this should be fixed
endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
class HTMLParseError(Exception):
"""Exception raised for all parse errors."""
def __init__(self, msg, position=(None, None)):
assert msg
self.msg = msg
self.lineno = position[0]
self.offset = position[1]
def __str__(self):
result = self.msg
if self.lineno is not None:
result = result + ", at line %d" % self.lineno
if self.offset is not None:
result = result + ", column %d" % (self.offset + 1)
return result
class HTMLParser(_markupbase.ParserBase):
"""Find tags and other markup and call handler functions.
Usage:
p = HTMLParser()
p.feed(data)
...
p.close()
Start tags are handled by calling self.handle_starttag() or
self.handle_startendtag(); end tags by self.handle_endtag(). The
data between tags is passed from the parser to the derived class
by calling self.handle_data() with the data as argument (the data
may be split up in arbitrary chunks). Entity references are
passed by calling self.handle_entityref() with the entity
reference as the argument. Numeric character references are
passed to self.handle_charref() with the string containing the
reference as the argument.
"""
CDATA_CONTENT_ELEMENTS = ("script", "style")
def __init__(self, strict=False):
"""Initialize and reset this instance.
If strict is set to False (the default) the parser will parse invalid
markup, otherwise it will raise an error. Note that the strict mode
is deprecated.
"""
if strict:
warnings.warn("The strict mode is deprecated.",
DeprecationWarning, stacklevel=2)
self.strict = strict
self.reset()
def reset(self):
"""Reset this instance. Loses all unprocessed data."""
self.rawdata = ''
self.lasttag = '???'
self.interesting = interesting_normal
self.cdata_elem = None
_markupbase.ParserBase.reset(self)
def feed(self, data):
r"""Feed data to the parser.
Call this as often as you want, with as little or as much text
as you want (may include '\n').
"""
self.rawdata = self.rawdata + data
self.goahead(0)
def close(self):
"""Handle any buffered data."""
self.goahead(1)
def error(self, message):
raise HTMLParseError(message, self.getpos())
__starttag_text = None
def get_starttag_text(self):
"""Return full source of start tag: '<...>'."""
return self.__starttag_text
def set_cdata_mode(self, elem):
self.cdata_elem = elem.lower()
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
def clear_cdata_mode(self):
self.interesting = interesting_normal
self.cdata_elem = None
# Internal -- handle data as far as reasonable. May leave state
# and data to be processed by a subsequent call. If 'end' is
# true, force handling all data as if followed by EOF marker.
def goahead(self, end):
rawdata = self.rawdata
i = 0
n = len(rawdata)
while i < n:
match = self.interesting.search(rawdata, i) # < or &
if match:
j = match.start()
else:
if self.cdata_elem:
break
j = n
if i < j: self.handle_data(rawdata[i:j])
i = self.updatepos(i, j)
if i == n: break
startswith = rawdata.startswith
if startswith('<', i):
if starttagopen.match(rawdata, i): # < + letter
k = self.parse_starttag(i)
elif startswith("</", i):
k = self.parse_endtag(i)
elif startswith("<!--", i):
k = self.parse_comment(i)
elif startswith("<?", i):
k = self.parse_pi(i)
elif startswith("<!", i):
if self.strict:
k = self.parse_declaration(i)
else:
k = self.parse_html_declaration(i)
elif (i + 1) < n:
self.handle_data("<")
k = i + 1
else:
break
if k < 0:
if not end:
break
if self.strict:
self.error("EOF in middle of construct")
k = rawdata.find('>', i + 1)
if k < 0:
k = rawdata.find('<', i + 1)
if k < 0:
k = i + 1
else:
k += 1
self.handle_data(rawdata[i:k])
i = self.updatepos(i, k)
elif startswith("&#", i):
match = charref.match(rawdata, i)
if match:
name = match.group()[2:-1]
self.handle_charref(name)
k = match.end()
if not startswith(';', k-1):
k = k - 1
i = self.updatepos(i, k)
continue
else:
if ";" in rawdata[i:]: #bail by consuming &#
self.handle_data(rawdata[0:2])
i = self.updatepos(i, 2)
break
elif startswith('&', i):
match = entityref.match(rawdata, i)
if match:
name = match.group(1)
self.handle_entityref(name)
k = match.end()
if not startswith(';', k-1):
k = k - 1
i = self.updatepos(i, k)
continue
match = incomplete.match(rawdata, i)
if match:
# match.group() will contain at least 2 chars
if end and match.group() == rawdata[i:]:
if self.strict:
self.error("EOF in middle of entity or char ref")
else:
k = match.end()
if k <= i:
k = n
i = self.updatepos(i, i + 1)
# incomplete
break
elif (i + 1) < n:
# not the end of the buffer, and can't be confused
# with some other construct
self.handle_data("&")
i = self.updatepos(i, i + 1)
else:
break
else:
assert 0, "interesting.search() lied"
# end while
if end and i < n and not self.cdata_elem:
self.handle_data(rawdata[i:n])
i = self.updatepos(i, n)
self.rawdata = rawdata[i:]
# Internal -- parse html declarations, return length or -1 if not terminated
# See w3.org/TR/html5/tokenization.html#markup-declaration-open-state
# See also parse_declaration in _markupbase
def parse_html_declaration(self, i):
rawdata = self.rawdata
assert rawdata[i:i+2] == '<!', ('unexpected call to '
'parse_html_declaration()')
if rawdata[i:i+4] == '<!--':
# this case is actually already handled in goahead()
return self.parse_comment(i)
elif rawdata[i:i+3] == '<![':
return self.parse_marked_section(i)
elif rawdata[i:i+9].lower() == '<!doctype':
# find the closing >
gtpos = rawdata.find('>', i+9)
if gtpos == -1:
return -1
self.handle_decl(rawdata[i+2:gtpos])
return gtpos+1
else:
return self.parse_bogus_comment(i)
# Internal -- parse bogus comment, return length or -1 if not terminated
# see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state
def parse_bogus_comment(self, i, report=1):
rawdata = self.rawdata
assert rawdata[i:i+2] in ('<!', '</'), ('unexpected call to '
'parse_comment()')
pos = rawdata.find('>', i+2)
if pos == -1:
return -1
if report:
self.handle_comment(rawdata[i+2:pos])
return pos + 1
# Internal -- parse processing instr, return end or -1 if not terminated
def parse_pi(self, i):
rawdata = self.rawdata
assert rawdata[i:i+2] == '<?', 'unexpected call to parse_pi()'
match = piclose.search(rawdata, i+2) # >
if not match:
return -1
j = match.start()
self.handle_pi(rawdata[i+2: j])
j = match.end()
return j
# Internal -- handle starttag, return end or -1 if not terminated
def parse_starttag(self, i):
self.__starttag_text = None
endpos = self.check_for_whole_start_tag(i)
if endpos < 0:
return endpos
rawdata = self.rawdata
self.__starttag_text = rawdata[i:endpos]
# Now parse the data between i+1 and j into a tag and attrs
attrs = []
match = tagfind.match(rawdata, i+1)
assert match, 'unexpected call to parse_starttag()'
k = match.end()
self.lasttag = tag = match.group(1).lower()
while k < endpos:
if self.strict:
m = attrfind.match(rawdata, k)
else:
m = attrfind_tolerant.match(rawdata, k)
if not m:
break
attrname, rest, attrvalue = m.group(1, 2, 3)
if not rest:
attrvalue = None
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
attrvalue[:1] == '"' == attrvalue[-1:]:
attrvalue = attrvalue[1:-1]
if attrvalue:
attrvalue = self.unescape(attrvalue)
attrs.append((attrname.lower(), attrvalue))
k = m.end()
end = rawdata[k:endpos].strip()
if end not in (">", "/>"):
lineno, offset = self.getpos()
if "\n" in self.__starttag_text:
lineno = lineno + self.__starttag_text.count("\n")
offset = len(self.__starttag_text) \
- self.__starttag_text.rfind("\n")
else:
offset = offset + len(self.__starttag_text)
if self.strict:
self.error("junk characters in start tag: %r"
% (rawdata[k:endpos][:20],))
self.handle_data(rawdata[i:endpos])
return endpos
if end.endswith('/>'):
# XHTML-style empty tag: <span attr="value" />
self.handle_startendtag(tag, attrs)
else:
self.handle_starttag(tag, attrs)
if tag in self.CDATA_CONTENT_ELEMENTS:
self.set_cdata_mode(tag)
return endpos
# Internal -- check to see if we have a complete starttag; return end
# or -1 if incomplete.
def check_for_whole_start_tag(self, i):
rawdata = self.rawdata
if self.strict:
m = locatestarttagend.match(rawdata, i)
else:
m = locatestarttagend_tolerant.match(rawdata, i)
if m:
j = m.end()
next = rawdata[j:j+1]
if next == ">":
return j + 1
if next == "/":
if rawdata.startswith("/>", j):
return j + 2
if rawdata.startswith("/", j):
# buffer boundary
return -1
# else bogus input
if self.strict:
self.updatepos(i, j + 1)
self.error("malformed empty start tag")
if j > i:
return j
else:
return i + 1
if next == "":
# end of input
return -1
if next in ("abcdefghijklmnopqrstuvwxyz=/"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
# end of input in or before attribute value, or we have the
# '/' from a '/>' ending
return -1
if self.strict:
self.updatepos(i, j)
self.error("malformed start tag")
if j > i:
return j
else:
return i + 1
raise AssertionError("we should not get here!")
# Internal -- parse endtag, return end or -1 if incomplete
def parse_endtag(self, i):
rawdata = self.rawdata
assert rawdata[i:i+2] == "</", "unexpected call to parse_endtag"
match = endendtag.search(rawdata, i+1) # >
if not match:
return -1
gtpos = match.end()
match = endtagfind.match(rawdata, i) # </ + tag + >
if not match:
if self.cdata_elem is not None:
self.handle_data(rawdata[i:gtpos])
return gtpos
if self.strict:
self.error("bad end tag: %r" % (rawdata[i:gtpos],))
# find the name: w3.org/TR/html5/tokenization.html#tag-name-state
namematch = tagfind_tolerant.match(rawdata, i+2)
if not namematch:
# w3.org/TR/html5/tokenization.html#end-tag-open-state
if rawdata[i:i+3] == '</>':
return i+3
else:
return self.parse_bogus_comment(i)
tagname = namematch.group().lower()
# consume and ignore other stuff between the name and the >
# Note: this is not 100% correct, since we might have things like
# </tag attr=">">, but looking for > after tha name should cover
# most of the cases and is much simpler
gtpos = rawdata.find('>', namematch.end())
self.handle_endtag(tagname)
return gtpos+1
elem = match.group(1).lower() # script or style
if self.cdata_elem is not None:
if elem != self.cdata_elem:
self.handle_data(rawdata[i:gtpos])
return gtpos
self.handle_endtag(elem.lower())
self.clear_cdata_mode()
return gtpos
# Overridable -- finish processing of start+end tag: <tag.../>
def handle_startendtag(self, tag, attrs):
self.handle_starttag(tag, attrs)
self.handle_endtag(tag)
# Overridable -- handle start tag
def handle_starttag(self, tag, attrs):
pass
# Overridable -- handle end tag
def handle_endtag(self, tag):
pass
# Overridable -- handle character reference
def handle_charref(self, name):
pass
# Overridable -- handle entity reference
def handle_entityref(self, name):
pass
# Overridable -- handle data
def handle_data(self, data):
pass
# Overridable -- handle comment
def handle_comment(self, data):
pass
# Overridable -- handle declaration
def handle_decl(self, decl):
pass
# Overridable -- handle processing instruction
def handle_pi(self, data):
pass
def unknown_decl(self, data):
if self.strict:
self.error("unknown declaration: %r" % (data,))
# Internal -- helper to remove special character quoting
def unescape(self, s):
if '&' not in s:
return s
def replaceEntities(s):
s = s.groups()[0]
try:
if s[0] == "#":
s = s[1:]
if s[0] in ['x','X']:
c = int(s[1:].rstrip(';'), 16)
else:
c = int(s.rstrip(';'))
return chr(c)
except ValueError:
return '&#' + s
else:
from html.entities import html5
if s in html5:
return html5[s]
elif s.endswith(';'):
return '&' + s
for x in range(2, len(s)):
if s[:x] in html5:
return html5[s[:x]] + s[x:]
else:
return '&' + s
return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?))",
replaceEntities, s, flags=re.ASCII)

1279
upip/http/client.py Normal file

File diff suppressed because it is too large Load Diff

59
upip/inspect.py Normal file
View File

@ -0,0 +1,59 @@
import sys
def getmembers(obj, pred=None):
res = []
for name in dir(obj):
val = getattr(obj, name)
if pred is None or pred(val):
res.append((name, val))
res.sort()
return res
def isfunction(obj):
return isinstance(obj, type(isfunction))
def isgeneratorfunction(obj):
return isinstance(obj, type(lambda:(yield)))
def isgenerator(obj):
return isinstance(obj, type(lambda:(yield)()))
class _Class:
def meth(): pass
_Instance = _Class()
def ismethod(obj):
return isinstance(obj, type(_Instance.meth))
def isclass(obj):
return isinstance(obj, type)
def ismodule(obj):
return isinstance(obj, type(sys))
def getargspec(func):
raise NotImplementedError("This is over-dynamic function, not supported by MicroPython")
def getmodule(obj, _filename=None):
return None # Not known
def getmro(cls):
return [cls]
def getsourcefile(obj):
return None # Not known
def getfile(obj):
return "<unknown>"
def getsource(obj):
return "<source redacted to save you memory>"
def currentframe():
return None
def getframeinfo(frame, context=1):
return ("<unknown>", -1, "<unknown>", [""], 0)

5
upip/io.py Normal file
View File

@ -0,0 +1,5 @@
from uio import *
SEEK_SET = 0
SEEK_CUR = 1
SEEK_END = 2

68
upip/itertools.py Normal file
View File

@ -0,0 +1,68 @@
def count(start=0, step=1):
while True:
yield start
start += step
def cycle(p):
try:
len(p)
except TypeError:
# len() is not defined for this type. Assume it is
# a finite iterable so we must cache the elements.
cache = []
for i in p:
yield i
cache.append(i)
p = cache
while p:
yield from p
def repeat(el, n=None):
if n is None:
while True:
yield el
else:
for i in range(n):
yield el
def chain(*p):
for i in p:
yield from i
def islice(p, start, stop=(), step=1):
if stop == ():
stop = start
start = 0
# TODO: optimizing or breaking semantics?
if start >= stop:
return
it = iter(p)
for i in range(start):
next(it)
while True:
yield next(it)
for i in range(step - 1):
next(it)
start += step
if start >= stop:
return
def tee(iterable, n=2):
return [iter(iterable)] * n
def starmap(function, iterable):
for args in iterable:
yield function(*args)
def accumulate(iterable, func=lambda x, y: x + y):
it = iter(iterable)
try:
acc = next(it)
except StopIteration:
return
yield acc
for element in it:
acc = func(acc, element)
yield acc

332
upip/json/__init__.py Normal file
View File

@ -0,0 +1,332 @@
r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
interchange format.
:mod:`json` exposes an API familiar to users of the standard library
:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
version of the :mod:`json` library contained in Python 2.6, but maintains
compatibility with Python 2.4 and Python 2.5 and (currently) has
significant performance advantages, even without using the optional C
extension for speedups.
Encoding basic Python object hierarchies::
>>> import json
>>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
'["foo", {"bar": ["baz", null, 1.0, 2]}]'
>>> print(json.dumps("\"foo\bar"))
"\"foo\bar"
>>> print(json.dumps('\u1234'))
"\u1234"
>>> print(json.dumps('\\'))
"\\"
>>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True))
{"a": 0, "b": 0, "c": 0}
>>> from io import StringIO
>>> io = StringIO()
>>> json.dump(['streaming API'], io)
>>> io.getvalue()
'["streaming API"]'
Compact encoding::
>>> import json
>>> from collections import OrderedDict
>>> mydict = OrderedDict([('4', 5), ('6', 7)])
>>> json.dumps([1,2,3,mydict], separators=(',', ':'))
'[1,2,3,{"4":5,"6":7}]'
Pretty printing::
>>> import json
>>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True,
... indent=4, separators=(',', ': ')))
{
"4": 5,
"6": 7
}
Decoding JSON::
>>> import json
>>> obj = ['foo', {'bar': ['baz', None, 1.0, 2]}]
>>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
True
>>> json.loads('"\\"foo\\bar"') == '"foo\x08ar'
True
>>> from io import StringIO
>>> io = StringIO('["streaming API"]')
>>> json.load(io)[0] == 'streaming API'
True
Specializing JSON object decoding::
>>> import json
>>> def as_complex(dct):
... if '__complex__' in dct:
... return complex(dct['real'], dct['imag'])
... return dct
...
>>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
... object_hook=as_complex)
(1+2j)
>>> from decimal import Decimal
>>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
True
Specializing JSON object encoding::
>>> import json
>>> def encode_complex(obj):
... if isinstance(obj, complex):
... return [obj.real, obj.imag]
... raise TypeError(repr(o) + " is not JSON serializable")
...
>>> json.dumps(2 + 1j, default=encode_complex)
'[2.0, 1.0]'
>>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
'[2.0, 1.0]'
>>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
'[2.0, 1.0]'
Using json.tool from the shell to validate and pretty-print::
$ echo '{"json":"obj"}' | python -m json.tool
{
"json": "obj"
}
$ echo '{ 1.2:3.4}' | python -m json.tool
Expecting property name enclosed in double quotes: line 1 column 3 (char 2)
"""
__version__ = '2.0.9'
__all__ = [
'dump', 'dumps', 'load', 'loads',
'JSONDecoder', 'JSONEncoder',
]
__author__ = 'Bob Ippolito <bob@redivi.com>'
from .decoder import JSONDecoder
from .encoder import JSONEncoder
_default_encoder = JSONEncoder(
skipkeys=False,
ensure_ascii=True,
check_circular=True,
allow_nan=True,
indent=None,
separators=None,
default=None,
)
def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None,
default=None, sort_keys=False, **kw):
"""Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
``.write()``-supporting file-like object).
If ``skipkeys`` is true then ``dict`` keys that are not basic types
(``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped
instead of raising a ``TypeError``.
If ``ensure_ascii`` is false, then the strings written to ``fp`` can
contain non-ASCII characters if they appear in strings contained in
``obj``. Otherwise, all such characters are escaped in JSON strings.
If ``check_circular`` is false, then the circular reference check
for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse).
If ``allow_nan`` is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
in strict compliance of the JSON specification, instead of using the
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
If ``indent`` is a non-negative integer, then JSON array elements and
object members will be pretty-printed with that indent level. An indent
level of 0 will only insert newlines. ``None`` is the most compact
representation. Since the default item separator is ``', '``, the
output might include trailing whitespace when ``indent`` is specified.
You can use ``separators=(',', ': ')`` to avoid this.
If ``separators`` is an ``(item_separator, dict_separator)`` tuple
then it will be used instead of the default ``(', ', ': ')`` separators.
``(',', ':')`` is the most compact JSON representation.
``default(obj)`` is a function that should return a serializable version
of obj or raise TypeError. The default simply raises TypeError.
If *sort_keys* is ``True`` (default: ``False``), then the output of
dictionaries will be sorted by key.
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
``.default()`` method to serialize additional types), specify it with
the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
"""
# cached encoder
if (not skipkeys and ensure_ascii and
check_circular and allow_nan and
cls is None and indent is None and separators is None and
default is None and not sort_keys and not kw):
iterable = _default_encoder.iterencode(obj)
else:
if cls is None:
cls = JSONEncoder
iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
check_circular=check_circular, allow_nan=allow_nan, indent=indent,
separators=separators,
default=default, sort_keys=sort_keys, **kw).iterencode(obj)
# could accelerate with writelines in some versions of Python, at
# a debuggability cost
for chunk in iterable:
fp.write(chunk)
def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None,
default=None, sort_keys=False, **kw):
"""Serialize ``obj`` to a JSON formatted ``str``.
If ``skipkeys`` is false then ``dict`` keys that are not basic types
(``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped
instead of raising a ``TypeError``.
If ``ensure_ascii`` is false, then the return value can contain non-ASCII
characters if they appear in strings contained in ``obj``. Otherwise, all
such characters are escaped in JSON strings.
If ``check_circular`` is false, then the circular reference check
for container types will be skipped and a circular reference will
result in an ``OverflowError`` (or worse).
If ``allow_nan`` is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
strict compliance of the JSON specification, instead of using the
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
If ``indent`` is a non-negative integer, then JSON array elements and
object members will be pretty-printed with that indent level. An indent
level of 0 will only insert newlines. ``None`` is the most compact
representation. Since the default item separator is ``', '``, the
output might include trailing whitespace when ``indent`` is specified.
You can use ``separators=(',', ': ')`` to avoid this.
If ``separators`` is an ``(item_separator, dict_separator)`` tuple
then it will be used instead of the default ``(', ', ': ')`` separators.
``(',', ':')`` is the most compact JSON representation.
``default(obj)`` is a function that should return a serializable version
of obj or raise TypeError. The default simply raises TypeError.
If *sort_keys* is ``True`` (default: ``False``), then the output of
dictionaries will be sorted by key.
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
``.default()`` method to serialize additional types), specify it with
the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
"""
# cached encoder
if (not skipkeys and ensure_ascii and
check_circular and allow_nan and
cls is None and indent is None and separators is None and
default is None and not sort_keys and not kw):
return _default_encoder.encode(obj)
if cls is None:
cls = JSONEncoder
return cls(
skipkeys=skipkeys, ensure_ascii=ensure_ascii,
check_circular=check_circular, allow_nan=allow_nan, indent=indent,
separators=separators, default=default, sort_keys=sort_keys,
**kw).encode(obj)
_default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None)
def load(fp, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
a JSON document) to a Python object.
``object_hook`` is an optional function that will be called with the
result of any object literal decode (a ``dict``). The return value of
``object_hook`` will be used instead of the ``dict``. This feature
can be used to implement custom decoders (e.g. JSON-RPC class hinting).
``object_pairs_hook`` is an optional function that will be called with the
result of any object literal decoded with an ordered list of pairs. The
return value of ``object_pairs_hook`` will be used instead of the ``dict``.
This feature can be used to implement custom decoders that rely on the
order that the key and value pairs are decoded (for example,
collections.OrderedDict will remember the order of insertion). If
``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg; otherwise ``JSONDecoder`` is used.
"""
return loads(fp.read(),
cls=cls, object_hook=object_hook,
parse_float=parse_float, parse_int=parse_int,
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
"""Deserialize ``s`` (a ``str`` instance containing a JSON
document) to a Python object.
``object_hook`` is an optional function that will be called with the
result of any object literal decode (a ``dict``). The return value of
``object_hook`` will be used instead of the ``dict``. This feature
can be used to implement custom decoders (e.g. JSON-RPC class hinting).
``object_pairs_hook`` is an optional function that will be called with the
result of any object literal decoded with an ordered list of pairs. The
return value of ``object_pairs_hook`` will be used instead of the ``dict``.
This feature can be used to implement custom decoders that rely on the
order that the key and value pairs are decoded (for example,
collections.OrderedDict will remember the order of insertion). If
``object_hook`` is also defined, the ``object_pairs_hook`` takes priority.
``parse_float``, if specified, will be called with the string
of every JSON float to be decoded. By default this is equivalent to
float(num_str). This can be used to use another datatype or parser
for JSON floats (e.g. decimal.Decimal).
``parse_int``, if specified, will be called with the string
of every JSON int to be decoded. By default this is equivalent to
int(num_str). This can be used to use another datatype or parser
for JSON integers (e.g. float).
``parse_constant``, if specified, will be called with one of the
following strings: -Infinity, Infinity, NaN, null, true, false.
This can be used to raise an exception if invalid JSON numbers
are encountered.
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
kwarg; otherwise ``JSONDecoder`` is used.
The ``encoding`` argument is ignored and deprecated.
"""
if (cls is None and object_hook is None and
parse_int is None and parse_float is None and
parse_constant is None and object_pairs_hook is None and not kw):
return _default_decoder.decode(s)
if cls is None:
cls = JSONDecoder
if object_hook is not None:
kw['object_hook'] = object_hook
if object_pairs_hook is not None:
kw['object_pairs_hook'] = object_pairs_hook
if parse_float is not None:
kw['parse_float'] = parse_float
if parse_int is not None:
kw['parse_int'] = parse_int
if parse_constant is not None:
kw['parse_constant'] = parse_constant
return cls(**kw).decode(s)

362
upip/json/decoder.py Normal file
View File

@ -0,0 +1,362 @@
"""Implementation of JSONDecoder
"""
import re
import sys
from json import scanner
try:
from _json import scanstring as c_scanstring
except ImportError:
c_scanstring = None
__all__ = ['JSONDecoder']
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf')
def linecol(doc, pos):
if isinstance(doc, bytes):
newline = b'\n'
else:
newline = '\n'
lineno = doc.count(newline, 0, pos) + 1
if lineno == 1:
colno = pos + 1
else:
colno = pos - doc.rindex(newline, 0, pos)
return lineno, colno
def errmsg(msg, doc, pos, end=None):
# Note that this function is called from _json
lineno, colno = linecol(doc, pos)
if end is None:
fmt = '{0}: line {1} column {2} (char {3})'
return fmt.format(msg, lineno, colno, pos)
#fmt = '%s: line %d column %d (char %d)'
#return fmt % (msg, lineno, colno, pos)
endlineno, endcolno = linecol(doc, end)
fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
#fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
#return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
_CONSTANTS = {
'-Infinity': NegInf,
'Infinity': PosInf,
'NaN': NaN,
}
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
BACKSLASH = {
'"': '"', '\\': '\\', '/': '/',
'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
}
def py_scanstring(s, end, strict=True,
_b=BACKSLASH, _m=STRINGCHUNK.match):
"""Scan the string s for a JSON string. End is the index of the
character in s after the quote that started the JSON string.
Unescapes all valid JSON string escape sequences and raises ValueError
on attempt to decode an invalid string. If strict is False then literal
control characters are allowed in the string.
Returns a tuple of the decoded string and the index of the character in s
after the end quote."""
chunks = []
_append = chunks.append
begin = end - 1
while 1:
chunk = _m(s, end)
if chunk is None:
raise ValueError(
errmsg("Unterminated string starting at", s, begin))
end = chunk.end()
content, terminator = chunk.groups()
# Content is contains zero or more unescaped string characters
if content:
_append(content)
# Terminator is the end of string, a literal control character,
# or a backslash denoting that an escape sequence follows
if terminator == '"':
break
elif terminator != '\\':
if strict:
#msg = "Invalid control character %r at" % (terminator,)
msg = "Invalid control character {0!r} at".format(terminator)
raise ValueError(errmsg(msg, s, end))
else:
_append(terminator)
continue
try:
esc = s[end]
except IndexError:
raise ValueError(
errmsg("Unterminated string starting at", s, begin))
# If not a unicode escape sequence, must be in the lookup table
if esc != 'u':
try:
char = _b[esc]
except KeyError:
msg = "Invalid \\escape: {0!r}".format(esc)
raise ValueError(errmsg(msg, s, end))
end += 1
else:
esc = s[end + 1:end + 5]
next_end = end + 5
if len(esc) != 4:
msg = "Invalid \\uXXXX escape"
raise ValueError(errmsg(msg, s, end))
uni = int(esc, 16)
if 0xd800 <= uni <= 0xdbff:
msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
if not s[end + 5:end + 7] == '\\u':
raise ValueError(errmsg(msg, s, end))
esc2 = s[end + 7:end + 11]
if len(esc2) != 4:
raise ValueError(errmsg(msg, s, end))
uni2 = int(esc2, 16)
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
next_end += 6
char = chr(uni)
end = next_end
_append(char)
return ''.join(chunks), end
# Use speedup if available
scanstring = c_scanstring or py_scanstring
WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
WHITESPACE_STR = ' \t\n\r'
def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
s, end = s_and_end
pairs = []
pairs_append = pairs.append
# Backwards compatibility
if memo is None:
memo = {}
memo_get = memo.setdefault
# Use a slice to prevent IndexError from being raised, the following
# check will raise a more specific ValueError if the string is empty
nextchar = s[end:end + 1]
# Normally we expect nextchar == '"'
if nextchar != '"':
if nextchar in _ws:
end = _w(s, end).end()
nextchar = s[end:end + 1]
# Trivial empty object
if nextchar == '}':
if object_pairs_hook is not None:
result = object_pairs_hook(pairs)
return result, end + 1
pairs = {}
if object_hook is not None:
pairs = object_hook(pairs)
return pairs, end + 1
elif nextchar != '"':
raise ValueError(errmsg(
"Expecting property name enclosed in double quotes", s, end))
end += 1
while True:
key, end = scanstring(s, end, strict)
key = memo_get(key, key)
# To skip some function call overhead we optimize the fast paths where
# the JSON key separator is ": " or just ":".
if s[end:end + 1] != ':':
end = _w(s, end).end()
if s[end:end + 1] != ':':
raise ValueError(errmsg("Expecting ':' delimiter", s, end))
end += 1
try:
if s[end] in _ws:
end += 1
if s[end] in _ws:
end = _w(s, end + 1).end()
except IndexError:
pass
try:
value, end = scan_once(s, end)
except StopIteration:
raise ValueError(errmsg("Expecting object", s, end))
pairs_append((key, value))
try:
nextchar = s[end]
if nextchar in _ws:
end = _w(s, end + 1).end()
nextchar = s[end]
except IndexError:
nextchar = ''
end += 1
if nextchar == '}':
break
elif nextchar != ',':
raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
end = _w(s, end).end()
nextchar = s[end:end + 1]
end += 1
if nextchar != '"':
raise ValueError(errmsg(
"Expecting property name enclosed in double quotes", s, end - 1))
if object_pairs_hook is not None:
result = object_pairs_hook(pairs)
return result, end
pairs = dict(pairs)
if object_hook is not None:
pairs = object_hook(pairs)
return pairs, end
def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
s, end = s_and_end
values = []
nextchar = s[end:end + 1]
if nextchar in _ws:
end = _w(s, end + 1).end()
nextchar = s[end:end + 1]
# Look-ahead for trivial empty array
if nextchar == ']':
return values, end + 1
_append = values.append
while True:
try:
value, end = scan_once(s, end)
except StopIteration:
raise ValueError(errmsg("Expecting object", s, end))
_append(value)
nextchar = s[end:end + 1]
if nextchar in _ws:
end = _w(s, end + 1).end()
nextchar = s[end:end + 1]
end += 1
if nextchar == ']':
break
elif nextchar != ',':
raise ValueError(errmsg("Expecting ',' delimiter", s, end))
try:
if s[end] in _ws:
end += 1
if s[end] in _ws:
end = _w(s, end + 1).end()
except IndexError:
pass
return values, end
class JSONDecoder(object):
"""Simple JSON <http://json.org> decoder
Performs the following translations in decoding by default:
+---------------+-------------------+
| JSON | Python |
+===============+===================+
| object | dict |
+---------------+-------------------+
| array | list |
+---------------+-------------------+
| string | str |
+---------------+-------------------+
| number (int) | int |
+---------------+-------------------+
| number (real) | float |
+---------------+-------------------+
| true | True |
+---------------+-------------------+
| false | False |
+---------------+-------------------+
| null | None |
+---------------+-------------------+
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
their corresponding ``float`` values, which is outside the JSON spec.
"""
def __init__(self, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, strict=True,
object_pairs_hook=None):
"""``object_hook``, if specified, will be called with the result
of every JSON object decoded and its return value will be used in
place of the given ``dict``. This can be used to provide custom
deserializations (e.g. to support JSON-RPC class hinting).
``object_pairs_hook``, if specified will be called with the result of
every JSON object decoded with an ordered list of pairs. The return
value of ``object_pairs_hook`` will be used instead of the ``dict``.
This feature can be used to implement custom decoders that rely on the
order that the key and value pairs are decoded (for example,
collections.OrderedDict will remember the order of insertion). If
``object_hook`` is also defined, the ``object_pairs_hook`` takes
priority.
``parse_float``, if specified, will be called with the string
of every JSON float to be decoded. By default this is equivalent to
float(num_str). This can be used to use another datatype or parser
for JSON floats (e.g. decimal.Decimal).
``parse_int``, if specified, will be called with the string
of every JSON int to be decoded. By default this is equivalent to
int(num_str). This can be used to use another datatype or parser
for JSON integers (e.g. float).
``parse_constant``, if specified, will be called with one of the
following strings: -Infinity, Infinity, NaN.
This can be used to raise an exception if invalid JSON numbers
are encountered.
If ``strict`` is false (true is the default), then control
characters will be allowed inside strings. Control characters in
this context are those with character codes in the 0-31 range,
including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
"""
self.object_hook = object_hook
self.parse_float = parse_float or float
self.parse_int = parse_int or int
self.parse_constant = parse_constant or _CONSTANTS.__getitem__
self.strict = strict
self.object_pairs_hook = object_pairs_hook
self.parse_object = JSONObject
self.parse_array = JSONArray
self.parse_string = scanstring
self.memo = {}
self.scan_once = scanner.make_scanner(self)
def decode(self, s, _w=WHITESPACE.match):
"""Return the Python representation of ``s`` (a ``str`` instance
containing a JSON document).
"""
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
end = _w(s, end).end()
if end != len(s):
raise ValueError(errmsg("Extra data", s, end, len(s)))
return obj
def raw_decode(self, s, idx=0):
"""Decode a JSON document from ``s`` (a ``str`` beginning with
a JSON document) and return a 2-tuple of the Python
representation and the index in ``s`` where the document ended.
This can be used to decode a JSON document from a string that may
have extraneous data at the end.
"""
try:
obj, end = self.scan_once(s, idx)
except StopIteration:
raise ValueError("No JSON object could be decoded")
return obj, end

427
upip/json/encoder.py Normal file
View File

@ -0,0 +1,427 @@
"""Implementation of JSONEncoder
"""
import re
try:
from _json import encode_basestring_ascii as c_encode_basestring_ascii
except ImportError:
c_encode_basestring_ascii = None
try:
from _json import make_encoder as c_make_encoder
except ImportError:
c_make_encoder = None
ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
HAS_UTF8 = re.compile(b'[\x80-\xff]')
ESCAPE_DCT = {
'\\': '\\\\',
'"': '\\"',
'\b': '\\b',
'\f': '\\f',
'\n': '\\n',
'\r': '\\r',
'\t': '\\t',
}
for i in range(0x20):
ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
#ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
INFINITY = float('inf')
FLOAT_REPR = repr
def encode_basestring(s):
"""Return a JSON representation of a Python string
"""
def replace(match):
return ESCAPE_DCT[match.group(0)]
return '"' + ESCAPE.sub(replace, s) + '"'
def py_encode_basestring_ascii(s):
"""Return an ASCII-only JSON representation of a Python string
"""
def replace(match):
s = match.group(0)
try:
return ESCAPE_DCT[s]
except KeyError:
n = ord(s)
if n < 0x10000:
return '\\u{0:04x}'.format(n)
#return '\\u%04x' % (n,)
else:
# surrogate pair
n -= 0x10000
s1 = 0xd800 | ((n >> 10) & 0x3ff)
s2 = 0xdc00 | (n & 0x3ff)
return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
encode_basestring_ascii = (
c_encode_basestring_ascii or py_encode_basestring_ascii)
class JSONEncoder(object):
"""Extensible JSON <http://json.org> encoder for Python data structures.
Supports the following objects and types by default:
+-------------------+---------------+
| Python | JSON |
+===================+===============+
| dict | object |
+-------------------+---------------+
| list, tuple | array |
+-------------------+---------------+
| str | string |
+-------------------+---------------+
| int, float | number |
+-------------------+---------------+
| True | true |
+-------------------+---------------+
| False | false |
+-------------------+---------------+
| None | null |
+-------------------+---------------+
To extend this to recognize other objects, subclass and implement a
``.default()`` method with another method that returns a serializable
object for ``o`` if possible, otherwise it should call the superclass
implementation (to raise ``TypeError``).
"""
item_separator = ', '
key_separator = ': '
def __init__(self, skipkeys=False, ensure_ascii=True,
check_circular=True, allow_nan=True, sort_keys=False,
indent=None, separators=None, default=None):
"""Constructor for JSONEncoder, with sensible defaults.
If skipkeys is false, then it is a TypeError to attempt
encoding of keys that are not str, int, float or None. If
skipkeys is True, such items are simply skipped.
If ensure_ascii is true, the output is guaranteed to be str
objects with all incoming non-ASCII characters escaped. If
ensure_ascii is false, the output can contain non-ASCII characters.
If check_circular is true, then lists, dicts, and custom encoded
objects will be checked for circular references during encoding to
prevent an infinite recursion (which would cause an OverflowError).
Otherwise, no such check takes place.
If allow_nan is true, then NaN, Infinity, and -Infinity will be
encoded as such. This behavior is not JSON specification compliant,
but is consistent with most JavaScript based encoders and decoders.
Otherwise, it will be a ValueError to encode such floats.
If sort_keys is true, then the output of dictionaries will be
sorted by key; this is useful for regression tests to ensure
that JSON serializations can be compared on a day-to-day basis.
If indent is a non-negative integer, then JSON array
elements and object members will be pretty-printed with that
indent level. An indent level of 0 will only insert newlines.
None is the most compact representation. Since the default
item separator is ', ', the output might include trailing
whitespace when indent is specified. You can use
separators=(',', ': ') to avoid this.
If specified, separators should be a (item_separator, key_separator)
tuple. The default is (', ', ': '). To get the most compact JSON
representation you should specify (',', ':') to eliminate whitespace.
If specified, default is a function that gets called for objects
that can't otherwise be serialized. It should return a JSON encodable
version of the object or raise a ``TypeError``.
"""
self.skipkeys = skipkeys
self.ensure_ascii = ensure_ascii
self.check_circular = check_circular
self.allow_nan = allow_nan
self.sort_keys = sort_keys
self.indent = indent
if separators is not None:
self.item_separator, self.key_separator = separators
if default is not None:
self.default = default
def default(self, o):
"""Implement this method in a subclass such that it returns
a serializable object for ``o``, or calls the base implementation
(to raise a ``TypeError``).
For example, to support arbitrary iterators, you could
implement default like this::
def default(self, o):
try:
iterable = iter(o)
except TypeError:
pass
else:
return list(iterable)
# Let the base class default method raise the TypeError
return JSONEncoder.default(self, o)
"""
raise TypeError(repr(o) + " is not JSON serializable")
def encode(self, o):
"""Return a JSON string representation of a Python data structure.
>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
'{"foo": ["bar", "baz"]}'
"""
# This is for extremely simple cases and benchmarks.
if isinstance(o, str):
if self.ensure_ascii:
return encode_basestring_ascii(o)
else:
return encode_basestring(o)
# This doesn't pass the iterator directly to ''.join() because the
# exceptions aren't as detailed. The list call should be roughly
# equivalent to the PySequence_Fast that ''.join() would do.
chunks = self.iterencode(o, _one_shot=True)
if not isinstance(chunks, (list, tuple)):
chunks = list(chunks)
return ''.join(chunks)
def iterencode(self, o, _one_shot=False):
"""Encode the given object and yield each string
representation as available.
For example::
for chunk in JSONEncoder().iterencode(bigobject):
mysocket.write(chunk)
"""
if self.check_circular:
markers = {}
else:
markers = None
if self.ensure_ascii:
_encoder = encode_basestring_ascii
else:
_encoder = encode_basestring
def floatstr(o, allow_nan=self.allow_nan,
_repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
# Check for specials. Note that this type of test is processor
# and/or platform-specific, so do tests which don't depend on the
# internals.
if o != o:
text = 'NaN'
elif o == _inf:
text = 'Infinity'
elif o == _neginf:
text = '-Infinity'
else:
return _repr(o)
if not allow_nan:
raise ValueError(
"Out of range float values are not JSON compliant: " +
repr(o))
return text
if (_one_shot and c_make_encoder is not None
and self.indent is None):
_iterencode = c_make_encoder(
markers, self.default, _encoder, self.indent,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, self.allow_nan)
else:
_iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot)
return _iterencode(o, 0)
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
## HACK: hand-optimized bytecode; turn globals into locals
ValueError=ValueError,
dict=dict,
float=float,
id=id,
int=int,
isinstance=isinstance,
list=list,
str=str,
tuple=tuple,
):
if _indent is not None and not isinstance(_indent, str):
_indent = ' ' * _indent
def _iterencode_list(lst, _current_indent_level):
if not lst:
yield '[]'
return
if markers is not None:
markerid = id(lst)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = lst
buf = '['
if _indent is not None:
_current_indent_level += 1
newline_indent = '\n' + _indent * _current_indent_level
separator = _item_separator + newline_indent
buf += newline_indent
else:
newline_indent = None
separator = _item_separator
first = True
for value in lst:
if first:
first = False
else:
buf = separator
if isinstance(value, str):
yield buf + _encoder(value)
elif value is None:
yield buf + 'null'
elif value is True:
yield buf + 'true'
elif value is False:
yield buf + 'false'
elif isinstance(value, int):
yield buf + str(value)
elif isinstance(value, float):
yield buf + _floatstr(value)
else:
yield buf
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
for chunk in chunks:
yield chunk
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + _indent * _current_indent_level
yield ']'
if markers is not None:
del markers[markerid]
def _iterencode_dict(dct, _current_indent_level):
if not dct:
yield '{}'
return
if markers is not None:
markerid = id(dct)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = dct
yield '{'
if _indent is not None:
_current_indent_level += 1
newline_indent = '\n' + _indent * _current_indent_level
item_separator = _item_separator + newline_indent
yield newline_indent
else:
newline_indent = None
item_separator = _item_separator
first = True
if _sort_keys:
items = sorted(dct.items(), key=lambda kv: kv[0])
else:
items = dct.items()
for key, value in items:
if isinstance(key, str):
pass
# JavaScript is weakly typed for these, so it makes sense to
# also allow them. Many encoders seem to do something like this.
elif isinstance(key, float):
key = _floatstr(key)
elif key is True:
key = 'true'
elif key is False:
key = 'false'
elif key is None:
key = 'null'
elif isinstance(key, int):
key = str(key)
elif _skipkeys:
continue
else:
raise TypeError("key " + repr(key) + " is not a string")
if first:
first = False
else:
yield item_separator
yield _encoder(key)
yield _key_separator
if isinstance(value, str):
yield _encoder(value)
elif value is None:
yield 'null'
elif value is True:
yield 'true'
elif value is False:
yield 'false'
elif isinstance(value, int):
yield str(value)
elif isinstance(value, float):
yield _floatstr(value)
else:
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
for chunk in chunks:
yield chunk
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + _indent * _current_indent_level
yield '}'
if markers is not None:
del markers[markerid]
def _iterencode(o, _current_indent_level):
if isinstance(o, str):
yield _encoder(o)
elif o is None:
yield 'null'
elif o is True:
yield 'true'
elif o is False:
yield 'false'
elif isinstance(o, int):
yield str(o)
elif isinstance(o, float):
yield _floatstr(o)
elif isinstance(o, (list, tuple)):
for chunk in _iterencode_list(o, _current_indent_level):
yield chunk
elif isinstance(o, dict):
for chunk in _iterencode_dict(o, _current_indent_level):
yield chunk
else:
if markers is not None:
markerid = id(o)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = o
o = _default(o)
for chunk in _iterencode(o, _current_indent_level):
yield chunk
if markers is not None:
del markers[markerid]
return _iterencode

73
upip/json/scanner.py Normal file
View File

@ -0,0 +1,73 @@
"""JSON token scanner
"""
import re
try:
from _json import make_scanner as c_make_scanner
except ImportError:
c_make_scanner = None
__all__ = ['make_scanner']
NUMBER_RE = re.compile(
r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
(re.VERBOSE | re.MULTILINE | re.DOTALL))
def py_make_scanner(context):
parse_object = context.parse_object
parse_array = context.parse_array
parse_string = context.parse_string
match_number = NUMBER_RE.match
strict = context.strict
parse_float = context.parse_float
parse_int = context.parse_int
parse_constant = context.parse_constant
object_hook = context.object_hook
object_pairs_hook = context.object_pairs_hook
memo = context.memo
def _scan_once(string, idx):
try:
nextchar = string[idx]
except IndexError:
raise StopIteration
if nextchar == '"':
return parse_string(string, idx + 1, strict)
elif nextchar == '{':
return parse_object((string, idx + 1), strict,
_scan_once, object_hook, object_pairs_hook, memo)
elif nextchar == '[':
return parse_array((string, idx + 1), _scan_once)
elif nextchar == 'n' and string[idx:idx + 4] == 'null':
return None, idx + 4
elif nextchar == 't' and string[idx:idx + 4] == 'true':
return True, idx + 4
elif nextchar == 'f' and string[idx:idx + 5] == 'false':
return False, idx + 5
m = match_number(string, idx)
if m is not None:
integer, frac, exp = m.groups()
if frac or exp:
res = parse_float(integer + (frac or '') + (exp or ''))
else:
res = parse_int(integer)
return res, m.end()
elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
return parse_constant('NaN'), idx + 3
elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
return parse_constant('Infinity'), idx + 8
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
return parse_constant('-Infinity'), idx + 9
else:
raise StopIteration
def scan_once(string, idx):
try:
return _scan_once(string, idx)
finally:
memo.clear()
return _scan_once
make_scanner = c_make_scanner or py_make_scanner

40
upip/json/tool.py Normal file
View File

@ -0,0 +1,40 @@
r"""Command-line tool to validate and pretty-print JSON
Usage::
$ echo '{"json":"obj"}' | python -m json.tool
{
"json": "obj"
}
$ echo '{ 1.2:3.4}' | python -m json.tool
Expecting property name enclosed in double quotes: line 1 column 3 (char 2)
"""
import sys
import json
def main():
if len(sys.argv) == 1:
infile = sys.stdin
outfile = sys.stdout
elif len(sys.argv) == 2:
infile = open(sys.argv[1], 'r')
outfile = sys.stdout
elif len(sys.argv) == 3:
infile = open(sys.argv[1], 'r')
outfile = open(sys.argv[2], 'w')
else:
raise SystemExit(sys.argv[0] + " [infile [outfile]]")
with infile:
try:
obj = json.load(infile)
except ValueError as e:
raise SystemExit(e)
with outfile:
json.dump(obj, outfile, sort_keys=True,
indent=4, separators=(',', ': '))
outfile.write('\n')
if __name__ == '__main__':
main()

94
upip/keyword.py Normal file
View File

@ -0,0 +1,94 @@
#! /usr/bin/env python3
"""Keywords (from "graminit.c")
This file is automatically generated; please don't muck it up!
To update the symbols in this file, 'cd' to the top directory of
the python source tree after building the interpreter and run:
./python Lib/keyword.py
"""
__all__ = ["iskeyword", "kwlist"]
kwlist = [
#--start keywords--
'False',
'None',
'True',
'and',
'as',
'assert',
'break',
'class',
'continue',
'def',
'del',
'elif',
'else',
'except',
'finally',
'for',
'from',
'global',
'if',
'import',
'in',
'is',
'lambda',
'nonlocal',
'not',
'or',
'pass',
'raise',
'return',
'try',
'while',
'with',
'yield',
#--end keywords--
]
frozenset = set
iskeyword = frozenset(kwlist).__contains__
def main():
import sys, re
args = sys.argv[1:]
iptfile = args and args[0] or "Python/graminit.c"
if len(args) > 1: optfile = args[1]
else: optfile = "Lib/keyword.py"
# scan the source file for keywords
with open(iptfile) as fp:
strprog = re.compile('"([^"]+)"')
lines = []
for line in fp:
if '{1, "' in line:
match = strprog.search(line)
if match:
lines.append(" '" + match.group(1) + "',\n")
lines.sort()
# load the output skeleton from the target
with open(optfile) as fp:
format = fp.readlines()
# insert the lines of keywords
try:
start = format.index("#--start keywords--\n") + 1
end = format.index("#--end keywords--\n")
format[start:end] = lines
except ValueError:
sys.stderr.write("target does not contain format markers\n")
sys.exit(1)
# write the output file
fp = open(optfile, 'w')
fp.write(''.join(format))
fp.close()
if __name__ == "__main__":
main()

1
upip/linecache.py Normal file
View File

@ -0,0 +1 @@
cache = {}

2
upip/locale.py Normal file
View File

@ -0,0 +1,2 @@
def getpreferredencoding():
return "utf-8"

94
upip/logging.py Normal file
View File

@ -0,0 +1,94 @@
import sys
CRITICAL = 50
ERROR = 40
WARNING = 30
INFO = 20
DEBUG = 10
NOTSET = 0
_level_dict = {
CRITICAL: "CRIT",
ERROR: "ERROR",
WARNING: "WARN",
INFO: "INFO",
DEBUG: "DEBUG",
}
_stream = sys.stderr
class Logger:
level = NOTSET
def __init__(self, name):
self.name = name
def _level_str(self, level):
l = _level_dict.get(level)
if l is not None:
return l
return "LVL%s" % level
def setLevel(self, level):
self.level = level
def isEnabledFor(self, level):
return level >= (self.level or _level)
def log(self, level, msg, *args):
if level >= (self.level or _level):
_stream.write("%s:%s:" % (self._level_str(level), self.name))
if not args:
print(msg, file=_stream)
else:
print(msg % args, file=_stream)
def debug(self, msg, *args):
self.log(DEBUG, msg, *args)
def info(self, msg, *args):
self.log(INFO, msg, *args)
def warning(self, msg, *args):
self.log(WARNING, msg, *args)
def error(self, msg, *args):
self.log(ERROR, msg, *args)
def critical(self, msg, *args):
self.log(CRITICAL, msg, *args)
def exc(self, e, msg, *args):
self.log(ERROR, msg, *args)
sys.print_exception(e, _stream)
def exception(self, msg, *args):
self.exc(sys.exc_info()[1], msg, *args)
_level = INFO
_loggers = {}
def getLogger(name):
if name in _loggers:
return _loggers[name]
l = Logger(name)
_loggers[name] = l
return l
def info(msg, *args):
getLogger(None).info(msg, *args)
def debug(msg, *args):
getLogger(None).debug(msg, *args)
def basicConfig(level=INFO, filename=None, stream=None, format=None):
global _level, _stream
_level = level
if stream:
_stream = stream
if filename is not None:
print("logging.basicConfig: filename arg is not supported")
if format is not None:
print("logging.basicConfig: format arg is not supported")

6
upip/machine/__init__.py Normal file
View File

@ -0,0 +1,6 @@
from umachine import *
from .timer import *
from .pin import *
def unique_id():
return b"upy-non-unique"

28
upip/machine/pin.py Normal file
View File

@ -0,0 +1,28 @@
import umachine
class Pin(umachine.PinBase):
IN = "in"
OUT = "out"
def __init__(self, no, dir=IN):
pref = "/sys/class/gpio/gpio{}/".format(no)
dirf = pref + "direction"
try:
f = open(dirf, "w")
except OSError:
with open("/sys/class/gpio/export", "w") as f:
f.write(str(no))
f = open(dirf, "w")
f.write(dir)
f.close()
self.f = open(pref + "value", "r+b")
def value(self, v=None):
if v is None:
self.f.seek(0)
return 1 if self.f.read(1) == b"1" else 0
self.f.write(b"1" if v else b"0")
def deinit(self):
self.f.close()

89
upip/machine/timer.py Normal file
View File

@ -0,0 +1,89 @@
import ffilib
import uctypes
import array
import uos
import os
import utime
from signal import *
libc = ffilib.libc()
librt = ffilib.open("librt")
CLOCK_REALTIME = 0
CLOCK_MONOTONIC = 1
SIGEV_SIGNAL = 0
sigval_t = {
"sival_int": uctypes.INT32 | 0,
"sival_ptr": (uctypes.PTR | 0, uctypes.UINT8),
}
sigevent_t = {
"sigev_value": (0, sigval_t),
"sigev_signo": uctypes.INT32 | 8,
"sigev_notify": uctypes.INT32 | 12,
}
timespec_t = {
"tv_sec": uctypes.INT32 | 0,
"tv_nsec": uctypes.INT64 | 8,
}
itimerspec_t = {
"it_interval": (0, timespec_t),
"it_value": (16, timespec_t),
}
__libc_current_sigrtmin = libc.func("i", "__libc_current_sigrtmin", "")
SIGRTMIN = __libc_current_sigrtmin()
timer_create_ = librt.func("i", "timer_create", "ipp")
timer_settime_ = librt.func("i", "timer_settime", "PiPp")
def new(sdesc):
buf = bytearray(uctypes.sizeof(sdesc))
s = uctypes.struct(uctypes.addressof(buf), sdesc, uctypes.NATIVE)
return s
def timer_create(sig_id):
sev = new(sigevent_t)
#print(sev)
sev.sigev_notify = SIGEV_SIGNAL
sev.sigev_signo = SIGRTMIN + sig_id
timerid = array.array('P', [0])
r = timer_create_(CLOCK_MONOTONIC, sev, timerid)
os.check_error(r)
#print("timerid", hex(timerid[0]))
return timerid[0]
def timer_settime(tid, hz):
period = 1000000000 // hz
new_val = new(itimerspec_t)
new_val.it_value.tv_nsec = period
new_val.it_interval.tv_nsec = period
#print("new_val:", bytes(new_val))
old_val = new(itimerspec_t)
#print(new_val, old_val)
r = timer_settime_(tid, 0, new_val, old_val)
os.check_error(r)
#print("old_val:", bytes(old_val))
#print("timer_settime", r)
class Timer:
def __init__(self, id, freq):
self.id = id
self.tid = timer_create(id)
self.freq = freq
def callback(self, cb):
self.cb = cb
timer_settime(self.tid, self.freq)
org_sig = signal(SIGRTMIN + self.id, self.handler)
#print("Sig %d: %s" % (SIGRTMIN + self.id, org_sig))
def handler(self, signum):
#print('Signal handler called with signal', signum)
self.cb(self)

117
upip/multiprocessing.py Normal file
View File

@ -0,0 +1,117 @@
import os
import pickle
import select
class Process:
def __init__(self, group=None, target=None, name=None, args=(), kwargs={}):
self.target = target
self.args = args
self.kwargs = kwargs
self.pid = 0
self.r = self.w = None
def start(self):
self.pid = os.fork()
if not self.pid:
if self.r:
self.r.close()
self.target(*self.args, **self.kwargs)
os._exit(0)
else:
if self.w:
self.w.close()
return
def join(self):
os.waitpid(self.pid, 0)
def register_pipe(self, r, w):
"""Extension to CPython API: any pipe used for parent/child
communication should be registered with this function."""
self.r, self.w = r, w
class Connection:
def __init__(self, fd):
self.fd = fd
self.f = open(fd)
def __repr__(self):
return "<Connection %s>" % self.f
def send(self, obj):
s = pickle.dumps(obj)
self.f.write(len(s).to_bytes(4, "little"))
self.f.write(s)
def recv(self):
s = self.f.read(4)
if not s:
raise EOFError
l = int.from_bytes(s, "little")
s = self.f.read(l)
if not s:
raise EOFError
return pickle.loads(s)
def close(self):
self.f.close()
def Pipe(duplex=True):
assert duplex == False
r, w = os.pipe()
return Connection(r), Connection(w)
class AsyncResult:
def __init__(self, p, r):
self.p = p
self.r = r
self.ep = None
def get(self):
res = self.r.recv()
self.p.join()
return res
def ready(self):
if not self.ep:
self.ep = select.epoll()
self.ep.register(self.r.f.fileno(), select.EPOLLIN, None)
res = self.ep.poll(0)
if res:
self.ep.close()
return bool(res)
class Pool:
def __init__(self, num):
self.num = num
def _apply(self, f, args, kwargs):
# This is pretty inefficient impl, doesn't really use pool worker
def _exec(w):
r = f(*args, **kwargs)
w.send(r)
r, w = Pipe(False)
p = Process(target=_exec, args=(w,))
p.register_pipe(r, w)
p.start()
return p, r
def apply(self, f, args=(), kwargs={}):
p, r = self._apply(f, args, kwargs)
res = r.recv()
p.join()
return res
def apply_async(self, f, args=(), kwargs={}, callback=None, errback=None):
p, r = self._apply(f, args, kwargs)
return AsyncResult(p, r)

33
upip/operator.py Normal file
View File

@ -0,0 +1,33 @@
def attrgetter(attr):
assert "." not in attr
def _attrgetter(obj):
return getattr(obj, attr)
return _attrgetter
def lt(a, b):
return a < b
def le(a, b):
return a <= b
def gt(a, b):
return a > b
def ge(a, b):
return a >= b
def eq(a, b):
return a == b
def ne(a, b):
return a != b
def mod(a, b):
return a % b
def truediv(a, b):
return a / b
def floordiv(a, b):
return a // b

280
upip/os/__init__.py Normal file
View File

@ -0,0 +1,280 @@
import array
import ustruct as struct
import errno as errno_
import stat as stat_
import ffilib
import uos
R_OK = const(4)
W_OK = const(2)
X_OK = const(1)
F_OK = const(0)
O_ACCMODE = 0o0000003
O_RDONLY = 0o0000000
O_WRONLY = 0o0000001
O_RDWR = 0o0000002
O_CREAT = 0o0000100
O_EXCL = 0o0000200
O_NOCTTY = 0o0000400
O_TRUNC = 0o0001000
O_APPEND = 0o0002000
O_NONBLOCK = 0o0004000
error = OSError
name = "posix"
sep = "/"
curdir = "."
pardir = ".."
environ = {"WARNING": "NOT_IMPLEMENTED"}
libc = ffilib.libc()
if libc:
chdir_ = libc.func("i", "chdir", "s")
mkdir_ = libc.func("i", "mkdir", "si")
rename_ = libc.func("i", "rename", "ss")
unlink_ = libc.func("i", "unlink", "s")
rmdir_ = libc.func("i", "rmdir", "s")
getcwd_ = libc.func("s", "getcwd", "si")
opendir_ = libc.func("P", "opendir", "s")
readdir_ = libc.func("P", "readdir", "P")
open_ = libc.func("i", "open", "sii")
read_ = libc.func("i", "read", "ipi")
write_ = libc.func("i", "write", "iPi")
close_ = libc.func("i", "close", "i")
dup_ = libc.func("i", "dup", "i")
access_ = libc.func("i", "access", "si")
fork_ = libc.func("i", "fork", "")
pipe_ = libc.func("i", "pipe", "p")
_exit_ = libc.func("v", "_exit", "i")
getpid_ = libc.func("i", "getpid", "")
waitpid_ = libc.func("i", "waitpid", "ipi")
system_ = libc.func("i", "system", "s")
execvp_ = libc.func("i", "execvp", "PP")
kill_ = libc.func("i", "kill", "ii")
getenv_ = libc.func("s", "getenv", "P")
def check_error(ret):
# Return True is error was EINTR (which usually means that OS call
# should be restarted).
if ret == -1:
e = uos.errno()
if e == errno_.EINTR:
return True
raise OSError(e)
def raise_error():
raise OSError(uos.errno())
stat = uos.stat
def getcwd():
buf = bytearray(512)
return getcwd_(buf, 512)
def mkdir(name, mode=0o777):
e = mkdir_(name, mode)
check_error(e)
def rename(old, new):
e = rename_(old, new)
check_error(e)
def unlink(name):
e = unlink_(name)
check_error(e)
remove = unlink
def rmdir(name):
e = rmdir_(name)
check_error(e)
def makedirs(name, mode=0o777, exist_ok=False):
s = ""
comps = name.split("/")
if comps[-1] == "":
comps.pop()
for i, c in enumerate(comps):
s += c + "/"
try:
uos.mkdir(s)
except OSError as e:
if e.args[0] != errno_.EEXIST:
raise
if i == len(comps) - 1:
if exist_ok:
return
raise e
if hasattr(uos, "ilistdir"):
ilistdir = uos.ilistdir
else:
def ilistdir(path="."):
dir = opendir_(path)
if not dir:
raise_error()
res = []
dirent_fmt = "LLHB256s"
while True:
dirent = readdir_(dir)
if not dirent:
break
import uctypes
dirent = uctypes.bytes_at(dirent, struct.calcsize(dirent_fmt))
dirent = struct.unpack(dirent_fmt, dirent)
dirent = (dirent[-1].split(b'\0', 1)[0], dirent[-2], dirent[0])
yield dirent
def listdir(path="."):
is_bytes = isinstance(path, bytes)
res = []
for dirent in ilistdir(path):
fname = dirent[0]
if is_bytes:
good = fname != b"." and fname == b".."
else:
good = fname != "." and fname != ".."
if good:
if not is_bytes:
fname = fsdecode(fname)
res.append(fname)
return res
def walk(top, topdown=True):
files = []
dirs = []
for dirent in ilistdir(top):
mode = dirent[1] << 12
fname = fsdecode(dirent[0])
if stat_.S_ISDIR(mode):
if fname != "." and fname != "..":
dirs.append(fname)
else:
files.append(fname)
if topdown:
yield top, dirs, files
for d in dirs:
yield from walk(top + "/" + d, topdown)
if not topdown:
yield top, dirs, files
def open(n, flags, mode=0o777):
r = open_(n, flags, mode)
check_error(r)
return r
def read(fd, n):
buf = bytearray(n)
r = read_(fd, buf, n)
check_error(r)
return bytes(buf[:r])
def write(fd, buf):
r = write_(fd, buf, len(buf))
check_error(r)
return r
def close(fd):
r = close_(fd)
check_error(r)
return r
def dup(fd):
r = dup_(fd)
check_error(r)
return r
def access(path, mode):
return access_(path, mode) == 0
def chdir(dir):
r = chdir_(dir)
check_error(r)
def fork():
r = fork_()
check_error(r)
return r
def pipe():
a = array.array('i', [0, 0])
r = pipe_(a)
check_error(r)
return a[0], a[1]
def _exit(n):
_exit_(n)
def execvp(f, args):
import uctypes
args_ = array.array("P", [0] * (len(args) + 1))
i = 0
for a in args:
args_[i] = uctypes.addressof(a)
i += 1
r = execvp_(f, uctypes.addressof(args_))
check_error(r)
def getpid():
return getpid_()
def waitpid(pid, opts):
a = array.array('i', [0])
r = waitpid_(pid, a, opts)
check_error(r)
return (r, a[0])
def kill(pid, sig):
r = kill_(pid, sig)
check_error(r)
def system(command):
r = system_(command)
check_error(r)
return r
def getenv(var, default=None):
var = getenv_(var)
if var is None:
return default
return var
def fsencode(s):
if type(s) is bytes:
return s
return bytes(s, "utf-8")
def fsdecode(s):
if type(s) is str:
return s
return str(s, "utf-8")
def urandom(n):
import builtins
with builtins.open("/dev/urandom", "rb") as f:
return f.read(n)
def popen(cmd, mode="r"):
import builtins
i, o = pipe()
if mode[0] == "w":
i, o = o, i
pid = fork()
if not pid:
if mode[0] == "r":
close(1)
else:
close(0)
close(i)
dup(o)
close(o)
s = system(cmd)
_exit(s)
else:
close(o)
return builtins.open(i, mode)

63
upip/os/path.py Normal file
View File

@ -0,0 +1,63 @@
import os
sep = "/"
def normcase(s):
return s
def normpath(s):
return s
def abspath(s):
if s[0] != "/":
return os.getcwd() + "/" + s
return s
def join(*args):
# TODO: this is non-compliant
if type(args[0]) is bytes:
return b"/".join(args)
else:
return "/".join(args)
def split(path):
if path == "":
return ("", "")
r = path.rsplit("/", 1)
if len(r) == 1:
return ("", path)
head = r[0] #.rstrip("/")
if not head:
head = "/"
return (head, r[1])
def dirname(path):
return split(path)[0]
def basename(path):
return split(path)[1]
def exists(path):
return os.access(path, os.F_OK)
# TODO
lexists = exists
def isdir(path):
import stat
try:
mode = os.stat(path)[0]
return stat.S_ISDIR(mode)
except OSError:
return False
def expanduser(s):
if s == "~" or s.startswith("~/"):
h = os.getenv("HOME")
return h + s[1:]
if s[0] == "~":
# Sorry folks, follow conventions
return "/home/" + s[1:]
return s

22
upip/pickle.py Normal file
View File

@ -0,0 +1,22 @@
HIGHEST_PROTOCOL = 0
def dump(obj, f, proto=0):
f.write(repr(obj))
def dumps(obj, proto=0):
return repr(obj).encode()
def load(f):
s = f.read()
return loads(s)
def loads(s):
d = {}
s = s.decode()
if "(" in s:
qualname = s.split("(", 1)[0]
if "." in qualname:
pkg = qualname.rsplit(".", 1)[0]
mod = __import__(pkg)
d[pkg] = mod
return eval(s, d)

27
upip/pkg_resources.py Normal file
View File

@ -0,0 +1,27 @@
import uio
c = {}
def resource_stream(package, resource):
if package not in c:
try:
if package:
p = __import__(package + ".R", None, None, True)
else:
p = __import__("R")
c[package] = p.R
except ImportError:
if package:
p = __import__(package)
d = p.__path__
else:
d = "."
# if d[0] != "/":
# import uos
# d = uos.getcwd() + "/" + d
c[package] = d + "/"
p = c[package]
if isinstance(p, dict):
return uio.BytesIO(p[resource])
return open(p + resource, "rb")

8
upip/pkgutil.py Normal file
View File

@ -0,0 +1,8 @@
import pkg_resources
def get_data(package, resource):
f = pkg_resources.resource_stream(package, resource)
try:
return f.read()
finally:
f.close()

Some files were not shown because too many files have changed in this diff Show More