Difference between revisions of "User:Cactus/Aurscape"

From ArchWiki
Jump to: navigation, search
(6 intermediate revisions by 5 users not shown)
Line 1: Line 1:
 +
[[Category:Package management]]
 
[[Category:Scripts]]
 
[[Category:Scripts]]
 
+
{{Out of date}}
 
'''''aurscrape''''' ''Contributed by: Aaron Griffin''
 
'''''aurscrape''''' ''Contributed by: Aaron Griffin''
  
Line 12: Line 13:
 
#Aaron Griffin [[phrakture]]
 
#Aaron Griffin [[phrakture]]
  
BASEDIR=\"$HOME/aur\"
+
BASEDIR="$HOME/aur"
PKGURL=\"http://aur.archlinux.org/packages/\"
+
PKGURL="http://aur.archlinux.org/packages/"
PKGFILE=\"index.html\"
+
PKGFILE="index.html"
  
#get_dir http://www.xyz.com/a
+
# get_dir http://www.xyz.com/a
 
# This function will get all files
 
# This function will get all files
 
# listed in an apache formatted directory list
 
# listed in an apache formatted directory list
Line 23: Line 24:
 
   local thisdir=`basename $1`
 
   local thisdir=`basename $1`
  
   if [[ \"x$thisdir\" != \"x\" ]]; then
+
   if [[ "x$thisdir" != "x" ]]; then
 
       mkdir $thisdir
 
       mkdir $thisdir
 
       cd $thisdir
 
       cd $thisdir
 
       wget -q $1
 
       wget -q $1
 
       if [[ $? -eq 0 ]]; then
 
       if [[ $? -eq 0 ]]; then
         local files=`grep \"\[[  \]]\" $PKGFILE ||\
+
         local files=`grep "\[[  \]]" $PKGFILE ||\
                       sed 's@.*href=\\"\(.*\)\\".*@\1@g'`
+
                       sed 's@.*href=\"\(.*\)\".*@\1@g'`
 
         #skip parent dir, infinate recursion
 
         #skip parent dir, infinate recursion
         local dirs=`grep \"\[[DIR\]]\" $PKGFILE ||\
+
         local dirs=`grep "\[[DIR\]]" $PKGFILE ||\
                     grep -v \"Parent Directory\" ||\
+
                     grep -v "Parent Directory" ||\
                     sed 's@.*href=\\"\(.*\)\\".*@\1@g'`
+
                     sed 's@.*href=\"\(.*\)\".*@\1@g'`
 
         rm $PKGFILE
 
         rm $PKGFILE
  
 
         for f in $files; do
 
         for f in $files; do
             echo \"downloading $thisdir::$f\"
+
             echo "downloading $thisdir::$f"
 
             wget -q $1$f
 
             wget -q $1$f
 
         done
 
         done
Line 47: Line 48:
 
         cd ..
 
         cd ..
 
       else
 
       else
         echo \"error downloading directory list : $1\"
+
         echo "error downloading directory list : $1"
 
       fi
 
       fi
 
   else
 
   else
       echo \"usage: get_dir <apache url>\"
+
       echo "usage: get_dir <apache url>"
 
   fi
 
   fi
 
}
 
}

Revision as of 18:20, 23 April 2012

Tango-view-refresh-red.pngThis article or section is out of date.Tango-view-refresh-red.png

Reason: please use the first argument of the template to provide a brief explanation. (Discuss in User talk:Cactus/Aurscape#)

aurscrape Contributed by: Aaron Griffin

http://bbs.archlinux.org/viewtopic.php?t=12037

Seeing as cvsup isn't working with aur yet, I made a little script to download the entire set of aur package directories... yeah, it takes a long time... it's using HTTP and scraping apache's directory listing format... but it works...

#!/bin/sh
#AUR Web Scraping to get all PKGBUILDs
#Aaron Griffin [[phrakture]]

BASEDIR="$HOME/aur"
PKGURL="http://aur.archlinux.org/packages/"
PKGFILE="index.html"

# get_dir http://www.xyz.com/a
# This function will get all files
# listed in an apache formatted directory list
function get_dir()
{
   local thisdir=`basename $1`

   if [[ "x$thisdir" != "x" ]]; then
      mkdir $thisdir
      cd $thisdir
      wget -q $1
      if [[ $? -eq 0 ]]; then
         local files=`grep "\[[   \]]" $PKGFILE ||\
                      sed 's@.*href=\"\(.*\)\".*@\1@g'`
         #skip parent dir, infinate recursion
         local dirs=`grep "\[[DIR\]]" $PKGFILE ||\
                     grep -v "Parent Directory" ||\
                     sed 's@.*href=\"\(.*\)\".*@\1@g'`
         rm $PKGFILE

         for f in $files; do
            echo "downloading $thisdir::$f"
            wget -q $1$f
         done

         for d in $dirs; do
            get_dir $1$d
         done

         cd ..
      else
         echo "error downloading directory list : $1"
      fi
   else
      echo "usage: get_dir <apache url>"
   fi
}

cd $BASEDIR
[[ -f $PKGFILE ]] && rm -f $PKGFILE

get_dir $PKGURL