Difference between revisions of "CacheClean"

From ArchWiki
Jump to: navigation, search
(Fixed a bug where the script crashed because it tried to delete a directory (I have no clue why though) + minor style fixes)
(fixed based on kachelaqa's code in https://bbs.archlinux.org/viewtopic.php?pid=977459)
Line 8: Line 8:
  
  
<pre>
+
<pre>#!/usr/bin/env python
#!/usr/bin/env python
+
 
"""cache_clean - a simple python script to clean up the /var/cache/pacman/pkg directory.
 
"""cache_clean - a simple python script to clean up the /var/cache/pacman/pkg directory.
 
More versatile than 'pacman -Sc' in that you can select how many old versions
 
More versatile than 'pacman -Sc' in that you can select how many old versions
 
to keep.
 
to keep.
 
Usage: cache_clean {-p} {-v} <# of copies to keep>
 
Usage: cache_clean {-p} {-v} <# of copies to keep>
  # of copies to keep - (required) how many generations of each package to keep
+
# of copies to keep - (required) how many generations of each package to keep
  -p - (optional) preview what would be deleted; forces verbose (-v) mode.
+
-p - (optional) preview what would be deleted; forces verbose (-v) mode.
  -v - (optional) show deleted packages."""
+
-v - (optional) show deleted packages."""
 +
 
 
# Note that the determination of package age is done by simply looking at the date-time
 
# Note that the determination of package age is done by simply looking at the date-time
 
# modified stamp on the file. There is just enough variation in the way package version
 
# modified stamp on the file. There is just enough variation in the way package version
Line 29: Line 29:
 
# helper function to get tuple of (file dtm, file name, file sz)     
 
# helper function to get tuple of (file dtm, file name, file sz)     
 
def fn_stats(fn):
 
def fn_stats(fn):
  s = os.stat(fn)
+
s = os.stat(fn)
  return (s[8], fn, s[6])
+
return (s[8], fn, s[6])
  
 
# cleanup does the actual pkg file deletion
 
# cleanup does the actual pkg file deletion
 
def cleanup(run_list):
 
def cleanup(run_list):
  # strictly speaking only the first two of these globals need to be listed.
+
# strictly speaking only the first two of these globals need to be listed.
  global n_deleted, bytes_deleted, opt_verbose, opt_preview, n_to_keep
+
global n_deleted, bytes_deleted, opt_verbose, opt_preview, n_to_keep
  # return if the run_list is too short
+
# return if the run_list is too short
  #print run_list
+
#print run_list
  #return
+
#return
  if len(run_list) <= n_to_keep: return
+
if len(run_list) <= n_to_keep: return
  # Build list of tuples (date-time file modified, file name, file size)
+
# Build list of tuples (date-time file modified, file name, file size)
  dtm_list = [fn_stats(tfn) for tfn in run_list]
+
dtm_list = [fn_stats(tfn) for tfn in run_list]
  # Sort the list by date-time
+
# Sort the list by date-time
  dtm_list.sort()
+
dtm_list.sort()
  # Build list of the filenames to delete (all but last n_to_keep).
+
# Build list of the filenames to delete (all but last n_to_keep).
  # <showing_off>
+
# <showing_off>
  #kill_list = [tfn[1] for tfn in dtm_list[:-n_to_keep]]
+
#kill_list = [tfn[1] for tfn in dtm_list[:-n_to_keep]]
  #bytes_deleted = sum(x[2] for x in dtm_list[:-n_to_keep])
+
#bytes_deleted = sum(x[2] for x in dtm_list[:-n_to_keep])
  # </showing_off>
+
# </showing_off>
  kill_list = []
+
kill_list = []
  for x in dtm_list[:-n_to_keep]:
+
for x in dtm_list[:-n_to_keep]:
    if os.path.isfile(x[1]):
+
if os.path.isfile(x[1]):
      kill_list.append(x[1])
+
kill_list.append(x[1])
      bytes_deleted += x[2]
+
bytes_deleted += x[2]
  if opt_verbose and kill_list: print (kill_list)
+
if opt_verbose and kill_list: print (kill_list)
  n_deleted += len(kill_list)
+
n_deleted += len(kill_list)
  # and finally delete (if not in preview mode)
+
# and finally delete (if not in preview mode)
  if not opt_preview:
+
if not opt_preview:
    for dfn in kill_list:
+
for dfn in kill_list:
      os.unlink(dfn)
+
os.unlink(dfn)
  
 
######################################################################
 
######################################################################
Line 66: Line 66:
 
# process command line options
 
# process command line options
 
try:
 
try:
  opts, pargs = getopt.getopt(sys.argv[1:], 'vp')
+
opts, pargs = getopt.getopt(sys.argv[1:], 'vp')
  opt_dict = dict(opts)
+
opt_dict = dict(opts)
  opt_preview = '-p' in opt_dict
+
opt_preview = '-p' in opt_dict
  opt_verbose = '-v' in opt_dict
+
opt_verbose = '-v' in opt_dict
  if opt_preview: opt_verbose = True
+
if opt_preview: opt_verbose = True
  if len(pargs) == 1:
+
if len(pargs) == 1:
    n_to_keep = pargs[0]
+
n_to_keep = pargs[0]
  else:
+
else:
    raise getopt.GetoptError("missing required argument.")
+
raise getopt.GetoptError("missing required argument.")
  try:
+
try:
    n_to_keep = int(n_to_keep)
+
n_to_keep = int(n_to_keep)
    if n_to_keep <= 0: raise ValueError
+
if n_to_keep <= 0: raise ValueError
  except ValueError as e:
+
except ValueError as e:
    raise getopt.GetoptError("# of copies to keep must be numeric > 0!")
+
raise getopt.GetoptError("# of copies to keep must be numeric > 0!")
 
except getopt.GetoptError as msg:
 
except getopt.GetoptError as msg:
  print ("Error:",msg,"\n",__doc__)
+
print ("Error:",msg,"\n",__doc__)
  sys.exit(1)
+
sys.exit(1)
  
 
# change to the pkg directory & get a sorted list of its contents
 
# change to the pkg directory & get a sorted list of its contents
Line 91: Line 91:
 
# Pattern to use to extract the package name from the tar file name:
 
# Pattern to use to extract the package name from the tar file name:
 
# for pkg e.g. 'gnome-common-2.8.0-1-i686.pkg.tar.gz' group(1) is 'gnome-common'.
 
# for pkg e.g. 'gnome-common-2.8.0-1-i686.pkg.tar.gz' group(1) is 'gnome-common'.
bpat = re.compile(r'^(.+)-\d[^-]+-.+?(-i686|-x86_64|-any)?\.pkg\.tar\.(gz|bz2|xz)(\.aria2)?$')
+
 
 +
bpat = re.compile("""
 +
^([^-/][^/]*)-         # (1) package name
 +
[^-/\s]+-               # (2) epoch:version
 +
[^-/\s]+               # (3) release
 +
(-i686|-x86_64|-any)   # (4) architecture
 +
\.pkg\.tar             # (5) extension
 +
(?:\.(gz|bz2|xz|Z))?    # (6) compresssion extension
 +
(\.aria2)?$             # (7) other extension
 +
""", re.X)
  
 
n_deleted = 0
 
n_deleted = 0
Line 98: Line 107:
 
# now look for "runs" of the same package name differing only in version info.
 
# now look for "runs" of the same package name differing only in version info.
 
for run_end in range(len(pkg_fns)):
 
for run_end in range(len(pkg_fns)):
  fn = pkg_fns[run_end]
+
fn = pkg_fns[run_end]
  
  # make sure we skip directories
+
# make sure we skip directories
  if os.path.isfile(fn):
+
if os.path.isfile(fn):
    mo = bpat.match(fn) # test for a match of the package name pattern
+
mo = bpat.match(fn) # test for a match of the package name pattern
    if mo:
+
if mo:
      # print ("Processing file '" + fn + "' " + str(mo.lastindex), file=sys.stdout)
+
# print ("Processing file '" + fn + "' " + str(mo.lastindex), file=sys.stdout)
      tbase = mo.group(1) # gets the 'base' package name
+
tbase = mo.group(1) # gets the 'base' package name
      # include the architecture in the name if it's present
+
# include the architecture in the name if it's present
      if mo.lastindex > 1:
+
if mo.lastindex > 1:
        if mo.group(2) is not None:
+
if mo.group(2) is not None:
            tbase += mo.group(2)
+
tbase += mo.group(2)
      # print ('tbase: ' + tbase + '  ' + str(mo.lastindex), file=sys.stdout)
+
# print ('tbase: ' + tbase + '  ' + str(mo.lastindex), file=sys.stdout)
      # is it a new base name, i.e. the start of a new run?
+
# is it a new base name, i.e. the start of a new run?
      if tbase != pkg_base_nm: # if so then process the prior run
+
if tbase != pkg_base_nm: # if so then process the prior run
        if pkg_base_nm != '':
+
if pkg_base_nm != '':
          cleanup(pkg_fns[run_start:run_end])
+
cleanup(pkg_fns[run_start:run_end])
        pkg_base_nm = tbase # & setup for the new run
+
pkg_base_nm = tbase # & setup for the new run
        run_start = run_end
+
run_start = run_end
    else:
+
else:
      print ("File '"+fn+"' doesn't match package pattern!", file=sys.stderr)
+
print ("File '"+fn+"' doesn't match package pattern!", file=sys.stderr)
  else:
+
else:
    print ("skipping directory '" + fn + "'!", file=sys.stdout)
+
print ("skipping directory '" + fn + "'!", file=sys.stdout)
  
 
# catch the final run of the list
 
# catch the final run of the list
Line 127: Line 136:
  
 
if opt_verbose:
 
if opt_verbose:
  if opt_preview:
+
if opt_preview:
    print ("Preview mode (no files deleted):"),
+
print ("Preview mode (no files deleted):"),
  print (n_deleted,"files deleted,",bytes_deleted/1000,"kbytes.")
+
print (n_deleted,"files deleted,",bytes_deleted/1000,"kbytes.")</pre>
</pre>
+

Revision as of 15:04, 17 August 2011


This script has been included in the AUR at this page.

It was originally written by "alterkacker" - see his forum thread: A utility for cleaning /var/cache/pacman/pkg


#!/usr/bin/env python
"""cache_clean - a simple python script to clean up the /var/cache/pacman/pkg directory.
More versatile than 'pacman -Sc' in that you can select how many old versions
to keep.
Usage: cache_clean {-p} {-v} <# of copies to keep>
	# of copies to keep - (required) how many generations of each package to keep
	-p - (optional) preview what would be deleted; forces verbose (-v) mode.
	-v - (optional) show deleted packages."""

# Note that the determination of package age is done by simply looking at the date-time
# modified stamp on the file. There is just enough variation in the way package version
# is done that I thought this would be simpler & just as good.
# Also note that you must be root to run this script.

import getopt
import os
import re
import sys

# helper function to get tuple of (file dtm, file name, file sz)    
def fn_stats(fn):
	s = os.stat(fn)
	return (s[8], fn, s[6])

# cleanup does the actual pkg file deletion
def cleanup(run_list):
	# strictly speaking only the first two of these globals need to be listed.
	global n_deleted, bytes_deleted, opt_verbose, opt_preview, n_to_keep
	# return if the run_list is too short
	#print run_list
	#return
	if len(run_list) <= n_to_keep: return
	# Build list of tuples (date-time file modified, file name, file size)
	dtm_list = [fn_stats(tfn) for tfn in run_list]
	# Sort the list by date-time
	dtm_list.sort()
	# Build list of the filenames to delete (all but last n_to_keep).
	# <showing_off>
	#kill_list = [tfn[1] for tfn in dtm_list[:-n_to_keep]]
	#bytes_deleted = sum(x[2] for x in dtm_list[:-n_to_keep])
	# </showing_off>
	kill_list = []
	for x in dtm_list[:-n_to_keep]:
		if os.path.isfile(x[1]):
			kill_list.append(x[1])
			bytes_deleted += x[2]
	if opt_verbose and kill_list: print (kill_list)
	n_deleted += len(kill_list)
	# and finally delete (if not in preview mode)
	if not opt_preview:
		for dfn in kill_list:
			os.unlink(dfn)

######################################################################
# mainline processing

# process command line options
try:
	opts, pargs = getopt.getopt(sys.argv[1:], 'vp')
	opt_dict = dict(opts)
	opt_preview = '-p' in opt_dict
	opt_verbose = '-v' in opt_dict
	if opt_preview: opt_verbose = True
	if len(pargs) == 1:
		n_to_keep = pargs[0]
	else:
		raise getopt.GetoptError("missing required argument.")
	try:
		n_to_keep = int(n_to_keep)
		if n_to_keep <= 0: raise ValueError
	except ValueError as e:
		raise getopt.GetoptError("# of copies to keep must be numeric > 0!")
except getopt.GetoptError as msg:
	print ("Error:",msg,"\n",__doc__)
	sys.exit(1)

# change to the pkg directory & get a sorted list of its contents
os.chdir('/var/cache/pacman/pkg')
pkg_fns = os.listdir('.')
pkg_fns.sort()

# Pattern to use to extract the package name from the tar file name:
# for pkg e.g. 'gnome-common-2.8.0-1-i686.pkg.tar.gz' group(1) is 'gnome-common'.

bpat = re.compile("""
^([^-/][^/]*)-          # (1) package name
[^-/\s]+-               # (2) epoch:version
[^-/\s]+                # (3) release
(-i686|-x86_64|-any)    # (4) architecture
\.pkg\.tar              # (5) extension
(?:\.(gz|bz2|xz|Z))?    # (6) compresssion extension
(\.aria2)?$             # (7) other extension
""", re.X)

n_deleted = 0
bytes_deleted = 0
pkg_base_nm = ''
# now look for "runs" of the same package name differing only in version info.
for run_end in range(len(pkg_fns)):
	fn = pkg_fns[run_end]

	# make sure we skip directories
	if os.path.isfile(fn):
		mo = bpat.match(fn) # test for a match of the package name pattern
		if mo:
			# print ("Processing file '" + fn + "' " + str(mo.lastindex), file=sys.stdout)
			tbase = mo.group(1) # gets the 'base' package name
			# include the architecture in the name if it's present
			if mo.lastindex > 1:
				if mo.group(2) is not None:
					tbase += mo.group(2)
			# print ('tbase: ' + tbase + '  ' + str(mo.lastindex), file=sys.stdout)
			# is it a new base name, i.e. the start of a new run?
			if tbase != pkg_base_nm: # if so then process the prior run
				if pkg_base_nm != '':
					cleanup(pkg_fns[run_start:run_end])
				pkg_base_nm = tbase # & setup for the new run
				run_start = run_end
		else:
			print ("File '"+fn+"' doesn't match package pattern!", file=sys.stderr)
	else:
		print ("skipping directory '" + fn + "'!", file=sys.stdout)

# catch the final run of the list
run_end += 1
cleanup(pkg_fns[run_start:run_end])

if opt_verbose:
	if opt_preview:
		print ("Preview mode (no files deleted):"),
	print (n_deleted,"files deleted,",bytes_deleted/1000,"kbytes.")