SHELL := /bin/bash
emacs = $(if $$DISPLAY,"/usr/bin/emacs","/usr/bin/emacs -nw")
SortUniqCount := sort | uniq -c | sort -nr
URL := 'http://aktualne.centrum.cz/hledani.phtml?section=1496&type=clanky&q='
Article_Regex := 'http://aktualne\.centrum\.cz/czechnews/clanek\.phtml\?id=[0-9]+'
COURSE_WEB := /home/zabokrtsky/public_html/courses/npfl092/html/slides/w3c_slidy
SIZE := 5

default : dirs download freq fcompare rank rcompare

dirs :
        mkdir new old

download :
        cd new ; \
        ../download.sh 1 $(SIZE) 12mesicu $(URL) $(Article_Regex)

# Get the offset of 1 year old articles, use it to get them
        cd old ; \
        offset=1; \
        old_offset=0; \
        while ((offset > old_offset)) ; do \
            old_offset=$$offset; \
            offset=$$(wget -qO- $(URL)'&offset='$$offset'&time=12mesicu' \
            | grep -Eo 'offset=[0-9]+' | sort -t= -nk2 | tail -n1 ) ; \
        done ; \
        ../download.sh $$old_offset $(SIZE) relevance $(URL) $(Article_Regex)

freq :
        for dir in old new ; do \
            iconv -f l2 -t utf-8 $$dir/clanek* \
            | perl -pechomp \
            | perl -pe 's%<[^<>]+>%%g while /<.*>/s;s%\t%%g;s/\r//g;s/ +/\n/g;s/^/'$$dir'\t/mg' \
            | $(SortUniqCount) ; \
        done > freq.all

fcompare : 
        ./compare.perl freq freq.all | sort -n > freq.txt

rank :
        ./rank.perl freq.all > rank.all

rcompare : 
        ./compare.perl rank rank.all | sort -n > rank.txt

html : compare.html rank.html Makefile.html download.sh.html

%.html : %.perl
        $(emacs) --eval '(progn (load-library "htmlize")(htmlize-file "$<" "$@") (kill-emacs))'

%.html : %
        $(emacs) --eval '(progn (load-library "htmlize")(htmlize-file "$<" "$@") (kill-emacs))'

upload : html
        scp *.html ufal:$(COURSE_WEB)/

####################

clean :
        rm -rf freq.all rank.all

superclean: clean
        rm -rf old new freq.txt rank.txt *.html

.PHONY : clean dirs download freq fcompare html rcompare superclean upload