URI: 
       tImplement reading, merging and writing of Packages files - amprolla - devuan's apt repo merger
  HTML git clone git://parazyd.org/amprolla.git
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
   DIR commit 742e8d0e7f1aede6dac09ed92cba828378dfab24
   DIR parent 1226778224578a31af771a5879d90135aa2fb1ac
  HTML Author: Merlijn Wajer <merlijn@wizzup.org>
       Date:   Fri, 26 May 2017 00:03:25 +0200
       
       Implement reading, merging and writing of Packages files
       
       Diffstat:
         A amprolla                            |      42 +++++++++++++++++++++++++++++++
         M lib/config.py                       |     130 +++++++++++++++----------------
         D lib/delta.py                        |     108 -------------------------------
         A lib/package.py                      |      98 +++++++++++++++++++++++++++++++
         A lib/parse.py                        |     137 +++++++++++++++++++++++++++++++
       
       5 files changed, 341 insertions(+), 174 deletions(-)
       ---
   DIR diff --git a/amprolla b/amprolla
       t@@ -0,0 +1,42 @@
       +#!/usr/bin/env python3
       +
       +from os.path import join
       +from time import time
       +
       +from lib.package import (write_packages, load_packages_file,
       +        merge_packages, merge_packages_many)
       +from lib.parse import parse_release
       +from lib.config import banpkgs
       +
       +roots = {
       +    'devuan': 'spool/devuan/dists/jessie',
       +    'debian': 'spool/debian/dists/jessie',
       +    'debian-sec': 'spool/dists/jessie/updates/',
       +}
       +
       +#devuan_release_contents = open(join(roots['devuan'], 'Release')).read()
       +#debian_release_contents = open(join(roots['debian'], 'Release')).read()
       +#devuan_release = parse_release(devuan_release_contents)
       +#debian_release = parse_release(debian_release_contents)
       +#devuan_files = list(filter(lambda x: x.endswith('Packages.gz') and 'armhf' in x, devuan_release.keys()))
       +#debian_files = list(filter(lambda x: x.endswith('Packages.gz') and 'armhf' in x, debian_release.keys()))
       +
       +packages_file = 'main/binary-armhf/Packages.gz'
       +
       +t1 = time()
       +print('Loading packages')
       +
       +devuan = load_packages_file(join(roots['devuan'], packages_file))
       +debian = load_packages_file(join(roots['debian'], packages_file))
       +debian_sec = load_packages_file(join(roots['debian-sec'], packages_file))
       +
       +all_repos = [devuan, debian_sec, debian]
       +
       +print('Merging packages')
       +new_pkgs = merge_packages_many(all_repos, banned_packages=banpkgs)
       +
       +print('Writing packages')
       +write_packages(new_pkgs, 'Packages.merged')
       +
       +t2 = time()
       +print('time:', t2-t1)
   DIR diff --git a/lib/config.py b/lib/config.py
       t@@ -2,14 +2,12 @@
        # copyright (c) 2017 - Ivan J. <parazyd@dyne.org>
        # see LICENSE file for copyright and license details
        
       -amprolla = {
       -    "spooldir": "./spool",
       -    "sign_key": "fa1b0274",
       -    "mergedir": "./merged",
       -    "mergedsubdirs": ["dists", "pool"],
       -    "banpkgs": ['systemd', 'systemd-sysv']
       -    #"checksums": [ 'md5sum', 'sha1', 'sha256', 'sha512' ]
       -}
       +spooldir = "./spool"
       +sign_key = "fa1b0274"
       +mergedir = "./merged"
       +mergedsubdirs = ["dists", "pool"]
       +banpkgs = {'systemd', 'systemd-sysv'}
       +#checksums = [ 'md5sum', 'sha1', 'sha256', 'sha512' ]
        
        repos = {
            # key name is priority, first is 0
       t@@ -136,63 +134,63 @@ mainrepofiles = [
            "Release.gpg"
        ]
        
       -pkgfmt = [
       -    'Package:',
       -    'Version:',
       -    'Essential:',
       -    'Installed-Size:',
       -    'Maintainer:',
       -    'Architecture:',
       -    'Replaces:',
       -    'Provides:',
       -    'Depends:',
       -    'Conflicts:',
       -    'Pre-Depends:',
       -    'Breaks:',
       -    'Homepage:',
       -    'Apport:',
       -    'Auto-Built-Package:',
       +packages_keys = [
       +    'Package',
       +    'Version',
       +    'Essential',
       +    'Installed-Size',
       +    'Maintainer',
       +    'Architecture',
       +    'Replaces',
       +    'Provides',
       +    'Depends',
       +    'Conflicts',
       +    'Pre-Depends',
       +    'Breaks',
       +    'Homepage',
       +    'Apport',
       +    'Auto-Built-Package',
            'Build-Ids',
       -    'Origin:',
       -    'Bugs:',
       -    'Built-Using:',
       -    'Enhances:',
       -    'Recommends:',
       -    'Description:',
       -    'Description-md5:',
       -    'Ghc-Package:',
       -    'Gstreamer-Decoders:',
       -    'Gstreamer-Elements:',
       -    'Gstreamer-Encoders:',
       -    'Gstreamer-Uri-Sinks:',
       -    'Gstreamer-Uri-Sources:',
       -    'Gstreamer-Version:',
       -    'Lua-Versions:',
       -    'Modaliases:',
       -    'Npp-Applications:',
       -    'Npp-Description:',
       -    'Npp-File:',
       -    'Npp-Mimetype:',
       -    'Npp-Name:',
       -    'Origin:',
       -    'Original-Maintainer:',
       -    'Original-Source-Maintainer:',
       -    'Package-Type:',
       -    'Postgresql-Version:',
       -    'Python-Version:',
       -    'Python-Versions:',
       -    'Ruby-Versions:',
       -    'Source:',
       -    'Suggests:',
       -    'Xul-Appid:',
       -    'Multi-Arch:',
       -    'Build-Essential:',
       -    'Tag:',
       -    'Section:',
       -    'Priority:',
       -    'Filename:',
       -    'Size:',
       -    'MD5sum:',
       -    'SHA1:',
       -    'SHA256:'
       +    'Origin',
       +    'Bugs',
       +    'Built-Using',
       +    'Enhances',
       +    'Recommends',
       +    'Description',
       +    'Description-md5',
       +    'Ghc-Package',
       +    'Gstreamer-Decoders',
       +    'Gstreamer-Elements',
       +    'Gstreamer-Encoders',
       +    'Gstreamer-Uri-Sinks',
       +    'Gstreamer-Uri-Sources',
       +    'Gstreamer-Version',
       +    'Lua-Versions',
       +    'Modaliases',
       +    'Npp-Applications',
       +    'Npp-Description',
       +    'Npp-File',
       +    'Npp-Mimetype',
       +    'Npp-Name',
       +    'Origin',
       +    'Original-Maintainer',
       +    'Original-Source-Maintainer',
       +    'Package-Type',
       +    'Postgresql-Version',
       +    'Python-Version',
       +    'Python-Versions',
       +    'Ruby-Versions',
       +    'Source',
       +    'Suggests',
       +    'Xul-Appid',
       +    'Multi-Arch',
       +    'Build-Essential',
       +    'Tag',
       +    'Section',
       +    'Priority',
       +    'Filename',
       +    'Size',
       +    'MD5sum',
       +    'SHA1',
       +    'SHA256'
        ]
   DIR diff --git a/lib/delta.py b/lib/delta.py
       t@@ -1,108 +0,0 @@
       -#!/usr/bin/env python
       -# copyright (c) 2017 - Ivan J. <parazyd@dyne.org>
       -# see LICENSE file for copyright and license details
       -
       -import ast
       -import gzip
       -import re
       -import requests
       -import time
       -
       -import config
       -from log import notice
       -
       -
       -def get_time(date):
       -    return time.mktime(time.strptime(date, "%a, %d %b %Y %H:%M:%S %Z"))
       -
       -
       -def get_date(relfile):
       -    match = re.search('Date: .+', relfile)
       -    if match:
       -        line = relfile[match.start():match.end()]
       -        relfile = line.split(': ')[1]
       -    return relfile
       -
       -
       -def parse_release(reltext):
       -    hash = {}
       -    match = re.search('SHA256:+', reltext)
       -    if match:
       -        line = reltext[match.start():-1]
       -        for i in line.split('\n'):
       -            if i == 'SHA256:' or i == '\n':  # XXX: hack
       -                continue
       -            hash[(i.split()[2])] = i.split()[0]
       -        return hash
       -
       -
       -def parse_package(entry):
       -    # for parsing a single package
       -    values = re.split('\\n[A-Z].+?:', entry)[0:]
       -    values[0] = values[0].split(':')[1]
       -    keys = re.findall('\\n[A-Z].+?:', '\n' + entry)
       -    both = zip(keys, values)
       -    return {key.lstrip(): value for key, value in both}
       -
       -
       -def parse_packages(pkgtext):
       -    # this parses our package file into a hashmap
       -    # key: package name, value: entire package paragraph as a hashmap
       -    map = {}
       -
       -    # TODO: consider also this approach
       -    # def parse_packages(pkgfilepath):
       -    # with gzip.open(pkgfilepath, "rb") as f:
       -    #    pkgs = f.read().split("\n\n")
       -
       -    pkgs = pkgtext.split("\n\n")
       -    for pkg in pkgs:
       -        m = re.match('Package: .+', pkg)
       -        if m:
       -            line = pkg[m.start():m.end()]
       -            key = line.split(': ')[1]
       -            map[key] = parse_package(pkg)
       -    return map
       -
       -
       -def print_package(map, pkgname):
       -    try:
       -        pkg = ast.literal_eval(map[pkgname])
       -        sin = []
       -        for i in config.pkgfmt:
       -            if config.pkgfmt[i] in pkg.keys():
       -                sin.append(config.pkgfmt[i] + pkg[config.pkgfmt[i]])
       -        return sin
       -    except:
       -        log.die("nonexistent package")
       -
       -
       -def compare_dict(d1, d2):
       -    d1_keys = set(d1.keys())
       -    d2_keys = set(d2.keys())
       -    intersect_keys = d1_keys.intersection(d2_keys)
       -    modified = {o: (d1[o], d2[o]) for o in intersect_keys if d1[o] != d2[o]}
       -    return modified
       -
       -
       -def compare_release(oldrel, newrel):
       -    r = requests.get(newrel)
       -    new = r.text
       -    with open(oldrel, "rb") as f:
       -        old = f.read()
       -
       -    oldtime = get_time(get_date(old))
       -    newtime = get_time(get_date(new))
       -    if newtime > oldtime:
       -        notice("Update available")
       -        newhashes = parse_release(new)
       -        oldhashes = parse_release(old)
       -        changes = compare_dict(newhashes, oldhashes)
       -        # k = pkg name, v = sha256
       -        return changes
       -
       -
       -# relmap = compare_release("../spool/dists/jessie/updates/Release", "http://security.debian.org/dists/jessie/updates/Release")
       -# print relmap
       -# for k,v in relmap.iteritems():
       -#    print(k)
   DIR diff --git a/lib/package.py b/lib/package.py
       t@@ -0,0 +1,98 @@
       +from gzip import open as gzip_open
       +
       +from lib.parse import (parse_packages, parse_dependencies)
       +from lib.config import packages_keys
       +
       +def write_packages(packages, filename, sort=False):
       +    """
       +    Writes `packages` to a file (per debian Packages format)
       +    If sort=True, the packages are sorted by name.
       +    """
       +    f = open(filename, 'w+')
       +
       +    pkg_items = packages.items()
       +    if sort:
       +        pkg_items = sorted(pkg_items, key=lambda x: x[0])
       +
       +    for pkg_name, pkg_contents in pkg_items:
       +        for key in packages_keys:
       +            if key in pkg_contents:
       +                f.write('%s: %s\n' % (key, pkg_contents[key]))
       +        f.write('\n')
       +
       +    f.close()
       +
       +def load_packages_file(filename):
       +    """ Load a gzip'd packages file.
       +    Returns a dictionary of package name and package key-values.
       +    """
       +    packages_contents = gzip_open(filename).read()
       +    packages_contents = packages_contents.decode('utf-8')
       +    return parse_packages(packages_contents)
       +
       +
       +def package_banned(pkg, banned_pkgs):
       +    """
       +    Returns True is the package contains a banned dependency.
       +    Currently checks and parses both the 'Depends:' and the 'Pre-Depends' fields
       +    of the package.
       +    """
       +    if pkg.get('Package') in banned_pkgs:
       +        return True
       +
       +    depends = parse_dependencies(pkg.get('Depends', ''))
       +    pre_depends = parse_dependencies(pkg.get('Pre-Depends', ''))
       +
       +    depends = [v[0] for v in depends]
       +    pre_depends = [v[0] for v in pre_depends]
       +
       +    deps = set(depends).union(set(pre_depends))
       +
       +    return bool(deps.intersection(banned_pkgs))
       +
       +
       +def merge_packages(pkg1, pkg2, banned_packages=set()):
       +    """
       +    Merges two previously loaded/parsed (using load_packages_file) packages
       +    dictionaries, preferring `pkg1` over `pkg2`, and optionally discarding any
       +    banned packages.
       +    """
       +    new_pkgs = {}
       +    package_names = set(pkg1.keys()).union(set(pkg2.keys()))
       +
       +    for pkg in package_names:
       +        pkg1_pkg = pkg1.get(pkg)
       +        pkg2_pkg = pkg2.get(pkg)
       +
       +        if pkg1_pkg and pkg2_pkg:
       +            new_pkgs[pkg] = pkg1_pkg
       +        elif pkg1_pkg:
       +            if not package_banned(pkg1_pkg, banned_packages):
       +                new_pkgs[pkg] = pkg1_pkg
       +        elif pkg2_pkg:
       +            if not package_banned(pkg2_pkg, banned_packages):
       +                new_pkgs[pkg] = pkg2_pkg
       +        else:
       +            assert False, 'Impossibru'
       +
       +    return new_pkgs
       +
       +def merge_packages_many(packages, banned_packages=set()): # TODO: Make generic
       +    """
       +    Merges two (or more) previously loaded/parsed (using load_packages_file)
       +    packages dictionaries, priority is defined by the order of the `packages`
       +    list, optionally discarding any banned packages.
       +    """
       +    assert len(packages) > 1
       +
       +    new_pkgs = {}
       +
       +    pkg1 = packages[0]
       +    pkg2 = packages[1]
       +
       +    new_pkgs = merge_packages(pkg1, pkg2, banned_packages=banned_packages)
       +
       +    for pkg in packages[2:]:
       +        new_pkgs = merge_packages(new_pkgs, pkg, banned_packages=banned_packages)
       +
       +    return new_pkgs
   DIR diff --git a/lib/parse.py b/lib/parse.py
       t@@ -0,0 +1,137 @@
       +#!/usr/bin/env python
       +# copyright (c) 2017 - Ivan J. <parazyd@dyne.org>
       +# see LICENSE file for copyright and license details
       +
       +import ast
       +import gzip
       +import re
       +#import requests
       +import time
       +
       +from . import config
       +from .log import notice
       +
       +
       +def get_time(date):
       +    return time.mktime(time.strptime(date, "%a, %d %b %Y %H:%M:%S %Z"))
       +
       +
       +def get_date(relfile):
       +    match = re.search('Date: .+', relfile)
       +    if match:
       +        line = relfile[match.start():match.end()]
       +        relfile = line.split(': ')[1]
       +    return relfile
       +
       +
       +def parse_release(reltext):
       +    _hash = {}
       +    match = re.search('SHA256:+', reltext)
       +    if match:
       +        line = reltext[match.start():-1]
       +        for i in line.split('\n'):
       +            if i == 'SHA256:' or i == '\n':  # XXX: hack
       +                continue
       +            _hash[(i.split()[2])] = i.split()[0]
       +        return _hash
       +
       +PACKAGES_REGEX = re.compile('([A-Za-z0-9\-]+): ')
       +
       +def parse_package(entry):
       +    """ Parses a single Packages entry """
       +    contents = PACKAGES_REGEX.split(entry)[1:]  # Throw away the first ''
       +
       +    keys = contents[::2]
       +    vals = map(lambda x: x.strip(), contents[1::2])
       +
       +    return dict(zip(keys, vals))
       +
       +
       +def parse_packages(pkgtext):
       +    # this parses our package file into a hashmap
       +    # key: package name, value: entire package paragraph as a hashmap
       +    map = {}
       +
       +    pkgs = pkgtext.split("\n\n")
       +    for pkg in pkgs:
       +        m = re.match('Package: .+', pkg)
       +        if m:
       +            line = pkg[m.start():m.end()]
       +            key = line.split(': ')[1]
       +            map[key] = parse_package(pkg)
       +
       +    return map
       +
       +def parse_dependencies(dependencies):
       +    """
       +    Parses a dependency line from a debian Packages file.
       +
       +    Example line::
       +
       +        'lib6 (>= 2.4), libdbus-1-3 (>= 1.0.2), foo'
       +
       +    Output::
       +
       +        {'lib6': '(>= 2.4)', 'libdbus-1-3': '(>= 1.0.2)', 'foo': None}
       +    """
       +    r = {}
       +
       +    for pkg_plus_version in dependencies.split(', '):
       +        v = pkg_plus_version.split(' ', 1)
       +        name = v[0]
       +
       +        # If we get passed an empty string, the name is '', and we just outright
       +        # stop
       +        if not name:
       +            return {}
       +
       +        if len(v) == 2:
       +            version = v[1]
       +            r[name] = version
       +        else:
       +            r[name] = None
       +
       +    return r
       +
       +
       +def print_package(map, pkgname):
       +    try:
       +        pkg = ast.literal_eval(map[pkgname])
       +        sin = []
       +        for i in config.pkgfmt:
       +            if config.pkgfmt[i] in pkg.keys():
       +                sin.append(config.pkgfmt[i] + pkg[config.pkgfmt[i]])
       +        return sin
       +    except:
       +        log.die("nonexistent package")
       +
       +
       +def compare_dict(d1, d2):
       +    d1_keys = set(d1.keys())
       +    d2_keys = set(d2.keys())
       +    intersect_keys = d1_keys.intersection(d2_keys)
       +    modified = {o: (d1[o], d2[o]) for o in intersect_keys if d1[o] != d2[o]}
       +    return modified
       +
       +
       +def compare_release(oldrel, newrel):
       +    r = requests.get(newrel)
       +    new = r.text
       +    with open(oldrel, "rb") as f:
       +        old = f.read()
       +
       +    oldtime = get_time(get_date(old))
       +    newtime = get_time(get_date(new))
       +    if newtime > oldtime:
       +        notice("Update available")
       +        newhashes = parse_release(new)
       +        oldhashes = parse_release(old)
       +        changes = compare_dict(newhashes, oldhashes)
       +        # k = pkg name, v = sha256
       +        return changes
       +
       +
       +# relmap = compare_release("../spool/dists/jessie/updates/Release", "http://security.debian.org/dists/jessie/updates/Release")
       +# print relmap
       +# for k,v in relmap.iteritems():
       +#    print(k)