linkchecker/tests/run_alexa_1m.sh
2012-09-21 16:04:46 +02:00

21 lines
672 B
Bash
Executable file

#!/bin/sh
# Run the top 1 million URLs as reported by alexa site monitoring.
# The logfile should be checked for
# - unusual errors and warnings,
# - missing recursion when robots.txt is allowed
# The error file should be checked for
# - internal errors
# - program errors (ie. segmentation fault)
#
# Note that the result can depend on the current location.
# Some sites have geo-location-aware content.
set -u
LANG=C
logfile=alexa_1m.log
errfile=alexa_1m_err.log
rm -f $logfile $errfile
for url in $(cat $HOME/src/alexatopsites/top-1m.txt); do
echo "Checking $url" | tee -a $logfile | tee -a $errfile
./linkchecker -r1 --no-status $url >> $logfile 2>>$errfile
done