DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

Snippets has posted 5883 posts at DZone. View Full User Profile

Auto Download Pdf From Www.estado.com.br

03.28.2008
| 5383 views |
  • submit to reddit
        // This script downloads all pdfs from http://jpdf.estado.com.br
// Note, you must subscribe this newspaper in order to download the pdfs
// You must have wget installed

export http_proxy="http://10.1.1.1:8000"

cookie='sMkjwKA67H8FDcsZX5'
dd=`date +%d`
mm=`date +%m`
yyyy=`date +%Y`

index="http://jpdf.estado.com.br/menupdfi.php?E=SP&D=$dd/$mm/$yyyy&A=/estadopdf/sp/paginas/$yyyy/$mm/$dd/A01.pdf"

rm index.txt
./wget/wget -nc -k -S -U Mozilla --proxy --header "Cookie: User=$cookie " -O index.txt $index
if [ ! -f index.txt ]; then exit 1; fi

l=`gawk  'BEGIN {FS="\""} /option VALUE="\/estadopdf/ { print $2 }' index.txt`

for x in $l; do 

	# Ignora os classificados
	if [ ${x%01.pdf} -eq "Cl" ]; then continue; fi
	# Ignora o Guia
	if [ ${x%01.pdf} -eq "Q" ]; then continue; fi

	y=http://jpdf.estado.com.br${x%01.pdf}
	i=1
	flag=0
	
	while [ $i -lt 40 ]; do
		filename=`printf "%s%02d.pdf\n" $y $i`

		echo "=================================================================="
		echo $filename
		echo "=================================================================="
		./wget/wget -P estado -nc -k -S -U Mozilla --proxy --header "Cookie: User=$cookie " $filename
		if [ $? -eq 1 ]; then
			let flag=flag+1
			if [ $flag -gt 1 ]; then 
				flag=0
				echo "Proximo caderno..."; break;
			fi 
		fi
		
		sleep 1
		let i=i+1
	done
done