SHELL := /bin/bash
emacs = $(if $$DISPLAY,"/usr/bin/emacs","/usr/bin/emacs -nw")
SortUniqCount := sort | uniq -c | sort -nr
URL := 'http://aktualne.centrum.cz/hledani.phtml?section=1496&type=clanky&q='
Article_Regex := 'http://aktualne\.centrum\.cz/czechnews/clanek\.phtml\?id=[0-9]+'
COURSE_WEB := /home/zabokrtsky/public_html/courses/npfl092/html/slides/w3c_slidy
SIZE := 5
default : dirs download freq fcompare rank rcompare
dirs :
mkdir new old
download :
cd new ; \
../download.sh 1 $(SIZE) 12mesicu $(URL) $(Article_Regex)
cd old ; \
offset=1; \
old_offset=0; \
while ((offset > old_offset)) ; do \
old_offset=$$offset; \
offset=$$(wget -qO- $(URL)'&offset='$$offset'&time=12mesicu' \
| grep -Eo 'offset=[0-9]+' | sort -t= -nk2 | tail -n1 ) ; \
done ; \
../download.sh $$old_offset $(SIZE) relevance $(URL) $(Article_Regex)
freq :
for dir in old new ; do \
iconv -f l2 -t utf-8 $$dir/clanek* \
| perl -pechomp \
| perl -pe 's%<[^<>]+>%%g while /<.*>/s;s%\t%%g;s/\r//g;s/ +/\n/g;s/^/'$$dir'\t/mg' \
| $(SortUniqCount) ; \
done > freq.all
fcompare :
./compare.perl freq freq.all | sort -n > freq.txt
rank :
./rank.perl freq.all > rank.all
rcompare :
./compare.perl rank rank.all | sort -n > rank.txt
html : compare.html rank.html Makefile.html download.sh.html
%.html : %.perl
$(emacs) --eval '(progn (load-library "htmlize")(htmlize-file "$<" "$@") (kill-emacs))'
%.html : %
$(emacs) --eval '(progn (load-library "htmlize")(htmlize-file "$<" "$@") (kill-emacs))'
upload : html
scp *.html ufal:$(COURSE_WEB)/
clean :
rm -rf freq.all rank.all
superclean: clean
rm -rf old new freq.txt rank.txt *.html
.PHONY : clean dirs download freq fcompare html rcompare superclean upload