global - randomcrap - random crap programs of varying quality
HTML git clone git://git.codemadness.org/randomcrap
DIR Log
DIR Files
DIR Refs
DIR README
DIR LICENSE
---
global (3633B)
---
1 #!/bin/sh
2 # increase max parallel jobs allowed.
3 # NOTE: decreased jobs because server load was high and
4 # processing feeds (per feed) was slower.
5 maxjobs=8
6
7 starttime=$(echo | ts -m '%.s')
8
9 # custom connectors.
10 #. ~/.sfeed/connectors/vimeo
11 . ~/.sfeed/connectors/github
12 . ~/.sfeed/connectors/youtube-videos-with-duration
13 . ~/.sfeed/connectors/repology_history_atom.sh
14
15 # log(name, s)
16 log() {
17 # NOTE: uses "ts" and bc for timings.
18 endtime=$(echo | ts -m '%.s')
19 duration=$(echo "$endtime - $starttime" | bc)
20
21 printf '[%s] %-50.50s %s (%s)\n' "$(date +'%H:%M:%S')" "$1" "$2" "$duration"
22 }
23
24 # log_error(name, s)
25 log_error() {
26 # NOTE: uses "ts" and bc for timings.
27 endtime=$(echo | ts -m '%.s')
28 duration=$(echo "$endtime - $starttime" | bc)
29
30 printf '[%s] %-50.50s %s (%s)\n' "$(date +'%H:%M:%S')" "$1" "$2" "$duration" >&2
31 # set error exit status indicator for parallel jobs.
32 rm -f "${sfeedtmpdir}/ok"
33 }
34
35 # parse(name, feedurl, basesiteurl)
36 parse() {
37 case "$2" in
38 *://vimeo.com/api/v2/user/*/videos.json)
39 # create a feed from the public Vimeo API (v2).
40 vimeo2atom | sfeed "$3";;
41 *://api.github.com/**)
42 github_json_to_atom | sfeed "$3";;
43 *://www.youtube.com/**)
44 youtube_with_duration "$2";;
45 *://repology.org/**)
46 repology_history "$2" | sfeed "$3";;
47 *)
48 sfeed "$3";;
49 esac
50 }
51
52 # merge raw files: unique sort by id, title, link.
53 # merge(name, oldfile, newfile)
54 merge() {
55 LC_ALL=C sort -t ' ' -u -k6,6 -k2,2 -k3,3 "$3" "$2" 2>/dev/null
56 }
57
58 # order by timestamp (descending).
59 # order(name)
60 order() {
61 LC_ALL=C sort -t ' ' -k1rn,1 2>/dev/null
62 }
63
64 # fetch a feed via HTTP/HTTPS etc.
65 # fetch(name, url, feedfile)
66 fetch() {
67 useragent=""
68
69 case "$2" in
70 *://api.github.com/**)
71 useragent="github";;
72 # *://tilde.news/*)
73 # # requires User-Agent
74 # useragent="bla";;
75 *://www.anandtech.com/*)
76 # requires User-Agent
77 useragent="bla";;
78 # *://lobste.rs/*)
79 # # requires User-Agent
80 # useragent="bla";;
81 *://old.reddit.com/*)
82 # requires User-Agent
83 useragent="Firefox";;
84 *://en.wikipedia.org/*)
85 # requires User-Agent
86 useragent="Firefox";;
87 *://*.fsf.org/*)
88 # requires User-Agent
89 useragent="Firefox";;
90 *://repology.org/**)
91 # requires User-Agent
92 useragent="Firefox";;
93 *)
94 useragent="";;
95 esac
96
97 # curl -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
98 # "$2" 2>/dev/null;
99
100 # t="$(mktemp)"
101
102 # e-tag cache and optimizations (If-Modified-Since).
103 basename="$(basename "$3")"
104 etag="$HOME/.sfeed/etags/${basename}"
105 lastmod="$HOME/.sfeed/lastmod/${basename}"
106 output="${sfeedtmpdir}/feeds/${basename}.xml"
107
108 curl \
109 -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
110 --compressed \
111 --etag-save "${etag}" --etag-compare "${etag}" \
112 -R -o "${output}" \
113 -z "${lastmod}" \
114 "$2" 2>/dev/null || return 1
115
116 # # DEBUG
117 # curl -v \
118 # -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
119 # --compressed \
120 # --etag-save "${etag}" --etag-compare "${etag}" \
121 # -R -o "${output}" \
122 # -z "${lastmod}" \
123 # "$2" || return 1
124
125 # succesful, but no file written: assume it is OK and Not Modified.
126 [ -e "${output}" ] || return 0
127
128 # use server timestamp from curl -R to set Last-Modified.
129 touch -r "${output}" "${lastmod}" 2>/dev/null
130 cat "${output}" 2>/dev/null
131 # use write output status, other errors are ignored here.
132 fetchstatus="$?"
133 rm -f "${output}" 2>/dev/null
134 return "${fetchstatus}"
135
136 # DEBUG:
137
138 # no e-tag or optimizations
139 #curl \
140 # -L --max-redirs 0 -H "User-Agent:${useragent}" -f -s -m 15 \
141 # "$2" \
142 # 2>/dev/null > "$t"
143
144 #count="$(wc -c < "$t")"
145 #count=$((count+0))
146
147 #echo "$(date) $name $count" >> ~/.sfeed/bandwidth_no_etag
148
149 #cat "$t"
150
151 #rm -f "$t"
152 }