URI: 
       tamprolla_merge_contents.py - amprolla - devuan's apt repo merger
  HTML git clone https://git.parazyd.org/amprolla
   DIR Log
   DIR Files
   DIR Refs
   DIR README
   DIR LICENSE
       ---
       tamprolla_merge_contents.py (3287B)
       ---
            1 #!/usr/bin/env python3
            2 # See LICENSE file for copyright and license details.
            3 
            4 """
            5 Amprolla module for merging Contents files
            6 """
            7 
            8 from gzip import open as gzip_open
            9 from multiprocessing import Pool
           10 from os import makedirs
           11 from os.path import dirname, join, isfile
           12 from time import time
           13 
           14 import lib.globalvars as globalvars
           15 from lib.config import (arches, categories, cpunm, mergedir, mergesubdir,
           16                         repos, spooldir)
           17 from lib.lock import check_lock, free_lock
           18 from lib.log import die, info
           19 from amprolla_merge import prepare_merge_dict
           20 
           21 
           22 def merge_contents(filelist):
           23     """
           24     Merges a list of Contents files and returns a dict of the merged files
           25     """
           26     pkgs = {}
           27     for i in filelist:
           28         if i and isfile(i):
           29             cfile = gzip_open(i).read()
           30             cfile = cfile.decode('utf-8')
           31             contents = cfile.split('\n')
           32 
           33             header = False
           34             for line in contents:
           35                 if line.startswith('This file maps each file'):
           36                     header = True
           37                 if line.startswith('FILE'):
           38                     header = False
           39                     continue
           40 
           41                 if line != '' and not header:
           42                     sin = line.split()
           43                     if sin[-1] not in pkgs.keys():
           44                         pkgs[sin[-1]] = []
           45                     pkgs[sin[-1]].append(' '.join(sin[:-1]))
           46     return pkgs
           47 
           48 
           49 def write_contents(pkgs, filename):
           50     """
           51     Writes a merged Contents dict to the given filename in gzip format
           52     """
           53     makedirs(dirname(filename), exist_ok=True)
           54     gzf = gzip_open(filename, 'w')
           55 
           56     for pkg, files in sorted(pkgs.items()):
           57         for file in files:
           58             line = "%s %s\n" % (file, pkg)
           59             gzf.write(line.encode('utf-8'))
           60 
           61     gzf.write(b'\n')
           62     gzf.close()
           63 
           64 
           65 def main_merge(contents_file):
           66     """
           67     Main merge logic. First parses the files into dictionaries, and
           68     writes them to the mergedir afterwards
           69     """
           70     to_merge = prepare_merge_dict()
           71 
           72     for suite in to_merge:
           73         globalvars.suite = suite
           74         cont_list = []
           75         for rep in to_merge[suite]:
           76             if rep:
           77                 cont_list.append(join(rep, contents_file))
           78             else:
           79                 cont_list.append(None)
           80 
           81         print("Merging contents: %s" % cont_list)
           82         contents_dict = merge_contents(cont_list)
           83 
           84         outfile = cont_list[0].replace(join(spooldir,
           85                                             repos['devuan']['dists']),
           86                                        join(mergedir, mergesubdir))
           87         print("Writing contents: %s" % outfile)
           88         write_contents(contents_dict, outfile)
           89 
           90 
           91 def main():
           92     """
           93     Main function to allow multiprocessing.
           94     """
           95     cont = []
           96     for i in arches:
           97         for j in categories:
           98             if i != 'source':
           99                 cont.append(join(j, i.replace('binary', 'Contents')+'.gz'))
          100             else:
          101                 cont.append(join(j, 'Contents-'+i+'.gz'))
          102 
          103     mrgpool = Pool(cpunm)
          104     mrgpool.map(main_merge, cont)
          105     mrgpool.close()
          106 
          107 
          108 if __name__ == '__main__':
          109     try:
          110         t1 = time()
          111         check_lock()
          112         main()
          113         free_lock()
          114         t2 = time()
          115         info('Total contents merge time: %s' % (t2 - t1), tofile=True)
          116     except Exception as e:
          117         die(e)