import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.Calendar;
import java.util.List;
import java.util.Locale;
import java.util.Set;
private static final int MAX_PROGRAMMES_PER_DAY = 9999;
private static final int MAX_DAYS_AHEAD_SUPPORTED_BY_ZIGGOGIDS = 3;
+ private static final int MAX_CHANNELS_PER_REQUEST = 25;
public static String NAME = "ziggogids.nl";
return NAME;
}
- public static URL programmeUrl(List<Channel> channels, int day)
+ public static String programmeUrl(int day, int hour)
throws Exception {
StringBuilder s = new StringBuilder(programme_base_url);
- if (channels.size() < 1) {
- throw new Exception("should have at least one channel");
- }
- s.append("?channels=");
- boolean first = true;
- for (Channel i : channels) {
- if (first) {
- s.append(i.id);
- first = false;
- } else {
- s.append("," + i.id);
- }
- }
- s.append("&day=");
- s.append(day);
-
- return new URL(s.toString());
+ s.append("/");
+ GregorianCalendar cal = new GregorianCalendar();
+ cal.add(Calendar.DAY_OF_MONTH, day);
+ cal.set(Calendar.HOUR_OF_DAY, hour);
+ cal.set(Calendar.MINUTE, 0);
+ String date = new SimpleDateFormat("yyyyMMdd'T'HHmm").format(cal.getTime());
+ s.append(date);
+
+ return s.toString();
}
- public static URL DetailUrl(String id) throws Exception {
+ public static String detailUrl(String id) {
StringBuilder s = new StringBuilder(detail_base_url);
- s.append("?id=");
+ s.append("/typefav=false?progid=");
s.append(id);
- return new URL(s.toString());
+ return s.toString();
}
- private void setActiveChannel(CloseableHttpClient client, String channel) throws IOException {
- setActiveChannels(client, Collections.singletonList(channel));
- }
-
private Document fetchJsoup(CloseableHttpClient client, String url) throws IOException {
Document doc = null;
return doc;
}
+ private void setActiveChannel(CloseableHttpClient client, String channel) throws IOException {
+ setActiveChannels(client, Collections.singletonList(channel));
+ }
+
private void setActiveChannels(CloseableHttpClient client, List<String> channels) throws IOException {
Document doc;
try {
{
setActiveChannel(client, channel);
- String url = programme_base_url+"/2013-12-24T1615";
+ String url = programme_base_url+"/";
Document doc = fetchJsoup(client, url);
// logger.debug("ziggogids programme: " + doc.outerHtml());
String name = e.select("label").first().text();
logger.debug(" "+index+": \""+name+"\"");
Channel c = Channel.getChannel(getId(), index, name);
+ /* Too slow for now
try {
String icon = fetchIconUrl(httpclient, index);
logger.debug(" "+icon);
} catch (IOException e2) {
logger.error("IO Exception trying to get channel log for channel "+index, e2);
}
+ */
result.add(c);
}
return result;
return result; // empty list
}
- URL url = programmeUrl(channels, day);
-
- JSONObject jsonObject = fetchJSON(url);
+ CloseableHttpClient httpclient = HttpClients.createDefault();
for (Channel c : channels) {
- JSON ps = (JSON) jsonObject.get(c.id);
- if (ps.isArray()) {
- JSONArray programs = (JSONArray) ps;
- for (int i = 0; i < programs.size()
- && i < MAX_PROGRAMMES_PER_DAY; i++) {
- JSONObject programme = programs.getJSONObject(i);
- Programme p = programmeFromJSON(programme,
- config.fetchDetails);
- p.channel = c.getXmltvChannelId();
- result.add(p);
- }
- } else {
- JSONObject programs = (JSONObject) ps;
- int count = 0;
- for (Object o : programs.keySet()) {
- if (count > MAX_PROGRAMMES_PER_DAY)
- break;
- JSONObject programme = programs.getJSONObject(o.toString());
- Programme p = programmeFromJSON(programme,
- config.fetchDetails);
- p.channel = c.getXmltvChannelId();
- result.add(p);
- count++;
- }
- }
- }
+ setActiveChannel(httpclient, c.id);
- return result;
- }
+ String url = programmeUrl(day, 20); // hour
+ logger.debug("url: "+url);
- /*
- * {"4": [{"db_id":"12436404", "titel":"RTL Boulevard", "genre":"Amusement",
- * "soort":"Amusementsprogramma", "kijkwijzer":"", "artikel_id":null,
- * "datum_start":"2012-03-30 23:45:00", "datum_end":"2012-03-31 00:40:00"},
- * {"db_id":"12436397","titel":"Teleshop 4","genre":"Overige","soort":
- * "Homeshopping"
- * ,"kijkwijzer":"","artikel_id":null,"datum_start":"2012-03-31 00:40:00"
- * ,"datum_end":"2012-03-31 00:41:00"},
- * {"db_id":"12436398","titel":"Cupido TV"
- * ,"genre":"Overige","soort":"","kijkwijzer"
- * :"","artikel_id":null,"datum_start"
- * :"2012-03-31 00:41:00","datum_end":"2012-03-31 04:30:00"},
- * {"db_id":"12436399"
- * ,"titel":"Morning chat","genre":"Overige","soort":"","kijkwijzer"
- * :"","artikel_id"
- * :null,"datum_start":"2012-03-31 04:30:00","datum_end":"2012-03-31 06:00:00"
- * }, ....... ]}
- */
- private Programme programmeFromJSON(JSONObject programme,
- boolean fetchDetails) throws Exception {
- String id = programme.getString("db_id");
- Programme result = cache.get(getId(), id);
- boolean cached = (result != null);
- if (result == null) {
- stats.cacheMisses++;
- result = new Programme();
- // Do this here, because we can only add to these fields. Pity if
- // they're updated
- result.addTitle(programme.getString("titel"));
- String genre = programme.getString("genre");
- if (genre != null && !genre.isEmpty())
- result.addCategory(config.translateCategory(genre));
- String kijkwijzer = programme.getString("kijkwijzer");
- if (kijkwijzer != null && !kijkwijzer.isEmpty()) {
- List<String> list = parseKijkwijzer(kijkwijzer);
- if (config.joinKijkwijzerRatings) {
- // mythtv doesn't understand multiple <rating> tags
- result.addRating("kijkwijzer", StringUtils.join(list, ","));
- } else {
- for (String rating : list) {
- result.addRating("kijkwijzer", rating);
- }
- }
- // TODO add icon from HTML detail page
- }
- } else {
- // System.out.println("From cache: " +
- // programme.getString("titel"));
- stats.cacheHits++;
- }
- logger.trace(" titel:" + programme.getString("titel"));
- logger.trace("datum_start:" + programme.getString("datum_start"));
- logger.trace(" datum_end:" + programme.getString("datum_end"));
- logger.trace(" genre:" + programme.getString("genre"));
- SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss",
- new Locale("nl"));
- result.startTime = df.parse(programme.getString("datum_start"));
- result.endTime = df.parse(programme.getString("datum_end"));
- // TODO other fields
-
- if (fetchDetails && !cached) {
- // TODO also read details if those have not been cached
- fillDetails(id, result);
- }
- if (!cached) {
- // FIXME where to do this?
- cache.put(getId(), id, result);
- }
- logger.debug(result);
+ Document doc;
+ try {
+ doc = fetchJsoup(httpclient, url);
+ } catch (IOException e) {
+ logger.error("IO Exception trying to get ziggo channel list from "+url, e);
+ return result;
+ }
+
+// logger.debug("ziggogids programme: " + doc.outerHtml());
+
+ Elements rows = doc.select(".gids-item-row");
+ for(Element row: rows) {
+ logger.debug("*** row ***");
+ for(Element item: row.select(".gids-row-item")) {
+ Programme p = programmeFromElement(httpclient, item);
+ p.channel = c.getXmltvChannelId();
+ result.add(p);
+ logger.debug(p.toString());
+ }
+ }
+ }
return result;
}
- private void fillDetails(String id, Programme result) throws Exception {
- try {
- //fillJSONDetails(id, result);
- } catch (Exception e) {
- logger.warn("Error fetching details for programme "
- + result.toString());
- }
- try {
- //fillScraperDetails(id, result);
- } catch (Exception e) {
- logger.warn("Error fetching details for programme "
- + result.toString());
- }
+ private Programme programmeFromElement(CloseableHttpClient httpclient, Element item) {
+ String progid = item.attr("popup-id");
+ long start = Long.parseLong(item.attr("pr-start")); // unix time
+
+ String id = Long.toString(start)+"_"+progid;
+ Programme p = cache.get(getId(), id);
+ boolean cached = (p != null);
+ if (p == null) {
+ stats.cacheMisses++;
+ p = new Programme();
+ String description = item.select(".gids-row-item-title").text();
+ p.addTitle(description);
+ } else {
+ // System.out.println("From cache: " +
+ // programme.getString("titel"));
+ stats.cacheHits++;
+ }
+ p.startTime = new Date(1000L*start);
+ long duration = Integer.parseInt(item.attr("pr-duration")); // minutes
+ p.endTime = new Date(1000L*(start+60*duration));
+ if (config.fetchDetails && ( !cached || !p.hasDescription() ) ) {
+ fillDetails(httpclient, p, progid);
+ }
+ if (!cached) {
+ // FIXME where to do this?
+ cache.put(getId(), id, p);
+ }
+ return p;
+ }
- if ((result.secondaryTitles == null || result.secondaryTitles.isEmpty())
- && (!result.hasCategory("movies") && !result
- .hasCategory("film"))) {
- for (Programme.Title t : result.titles) {
- String[] parts = t.title.split("\\s*:\\s*", 2);
- if (parts.length >= 2 && parts[0].length() >= 5) {
- logger.debug("Splitting title from \"" + t.title
- + "\" to: \"" + parts[0].trim()
- + "\"; sub-title: \"" + parts[1].trim() + "\"");
- t.title = parts[0].trim();
- result.addSecondaryTitle(parts[1].trim());
- }
- }
- }
- }
+ private void fillDetails(CloseableHttpClient httpclient, Programme p, String progid) {
+ Document doc;
+ String url = detailUrl(progid);
+ try {
+ doc = fetchJsoup(httpclient, url);
+ } catch (IOException e) {
+ logger.error("IO Exception trying to get ziggo detail info from "+url, e);
+ return;
+ }
+ //logger.debug("ziggogids detail: " + doc.outerHtml());
+ Element desc = doc.select(".progpop_descr").first();
+ if(desc!=null) p.addDescription(desc.text());
+
+ Element kijkwijzer = doc.select(".progpop_kijkwijzer").first();
+ if(kijkwijzer!=null) {
+ // TODO
+ }
+ Element time = doc.select(".progpop_time").first();
+ if(time!=null) {
+ logger.debug("progpop_time: "+time.text());
+ String genre = time.text().replaceFirst("^[^,]+,","").trim();
+ logger.debug("Genre: " + genre);
+ p.addCategory(config.translateCategory(genre));
+ }
+ }
- /*
- * {"db_id":"12436404", "titel":"RTL Boulevard", "datum":"2012-03-30",
- * "btijd":"23:45:00", "etijd":"00:40:00", "synop":
- * "Amusementsprogramma Actualiteiten, vermaak en opinies met \u00e9\u00e9n of twee deskundigen, gasten of andere nieuwsmakers. In hoog tempo volgen afwisselende items en reportages elkaar op met de thema's showbizz, crime, royalty en lifestyle.<br><br>"
- * , "kijkwijzer":"", "genre":"Amusement",
- * "presentatie":"Winston Gerschtanowitz, Albert Verlinde",
- * "acteursnamen_rolverdeling":"", "regisseur":"", "zender_id":"4"}
- */
- /*
- private void fillJSONDetails(String id, Programme result) throws Exception {
- URL url = JSONDetailUrl(id);
- JSONObject json = fetchJSON(url);
- Set<String> keys = json.keySet();
- for (String key : keys) {
- String value = StringEscapeUtils.unescapeHtml(json.getString(key));
- if (value.isEmpty())
- continue;
- if (key.equals("synop")) {
- value = value.replaceAll("<br>", " ").replaceAll("<br />", " ")
- .replaceAll("<p[^>]*>", " ").replaceAll("</p>", " ")
- .replaceAll("<strong>", " ")
- .replaceAll("</strong>", " ").replaceAll("<em>", " ")
- .replaceAll("</em>", " ").trim();
- if (value.isEmpty())
- continue;
- result.addDescription(value);
- } else if (key.equals("presentatie")) {
- String[] parts = value.split(",");
- for (String s : parts) {
- result.addPresenter(s.trim());
- }
- } else if (key.equals("acteursnamen_rolverdeling")) {
- // TODO hoe zouden rollen kunnen worden aangegeven? Geen
- // voorbeelden van gezien.
- String[] parts = value.split(",");
- for (String s : parts) {
- result.addActor(s.trim());
- }
- } else if (key.equals("regisseur")) {
- String[] parts = value.split(",");
- for (String s : parts) {
- result.addDirector(s.trim());
- }
- } else if (key.equals("kijkwijzer")) {
- // TODO
- } else if (key.equals("db_id")) {
- // ignore
- } else if (key.equals("titel")) {
- // ignore
- } else if (key.equals("datum")) {
- // ignore
- } else if (key.equals("btijd")) {
- // ignore
- } else if (key.equals("etijd")) {
- // ignore
- } else if (key.equals("genre")) {
- // ignore
- } else if (key.equals("zender_id")) {
- // ignore
- } else {
- logger.warn("Unknown key in ziggogids.nl json details: \"" + key
- + "\"");
- }
- }
- }
-*/
-/*
- private void fillScraperDetails(String id, Programme result)
- throws Exception {
- Pattern progInfoPattern = Pattern.compile(
- "prog-info-content.*prog-info-footer", Pattern.DOTALL);
- Pattern infoLinePattern = Pattern
- .compile("<li><strong>(.*?):</strong>(.*?)</li>");
- Pattern HDPattern = Pattern.compile("HD \\d+[ip]?");
- Pattern kijkwijzerPattern = Pattern
- .compile("<img src=\"http://ziggogidsassets.nl/img/kijkwijzer/.*?\" alt=\"(.*?)\" />");
-
- URL url = HTMLDetailUrl(id);
- String clob = fetchURL(url);
- Matcher m = progInfoPattern.matcher(clob);
- if (m.find()) {
- String progInfo = m.group();
- Matcher m2 = infoLinePattern.matcher(progInfo);
- while (m2.find()) {
- logger.trace(" infoLine: " + m2.group());
- logger.trace(" key: " + m2.group(1));
- logger.trace(" value: " + m2.group(2));
- String key = m2.group(1).toLowerCase();
- String value = m2.group(2);
- if (key.equals("bijzonderheden")) {
- String[] list = value.split(",");
- for (String item : list) {
- if (item.toLowerCase().contains("teletekst")) {
- result.addSubtitle("teletext");
- } else if (item.toLowerCase().contains("breedbeeld")) {
- result.setVideoAspect("16:9");
- } else if (value.toLowerCase().contains("zwart")) {
- result.setVideoColour(false);
- } else if (value.toLowerCase().contains("stereo")) {
- result.setAudioStereo("stereo");
- } else if (value.toLowerCase().contains("herhaling")) {
- result.setPreviouslyShown();
- } else {
- Matcher m3 = HDPattern.matcher(value);
- if (m3.find()) {
- result.setVideoQuality(m3.group());
- } else {
- logger.warn(" Unknown value in 'bijzonderheden': "
- + item);
- }
- }
- }
- } else {
- // ignore other keys for now
- }
- Matcher m3 = kijkwijzerPattern.matcher(progInfo);
- List<String> kijkwijzer = new ArrayList<String>();
- while (m3.find()) {
- kijkwijzer.add(m3.group(1));
- }
- if (!kijkwijzer.isEmpty()) {
- // logger.debug(" kijkwijzer: " + kijkwijzer);
- }
- }
- }
- }
-*/
/**
* @param args
*/
try {
List<Channel> channels = gids.getChannels();
System.out.println("Channels: " + channels);
- /*
+
XMLStreamWriter writer = XMLOutputFactory.newInstance()
.createXMLStreamWriter(new FileWriter("ziggogids.xml"));
writer.writeStartDocument();
writer.writeCharacters("\n");
writer.writeStartElement("tv");
// List<Channel> my_channels = channels;
- List<Channel> my_channels = channels.subList(0, 15);
+ //List<Channel> my_channels = channels.subList(0, 15);
+ List<Channel> my_channels = channels.subList(0, 3);
for (Channel c : my_channels) {
c.serialize(writer, true);
}
System.out.println("Number of fetch errors: "
+ stats.fetchErrors);
}
- */
+
gids.close();
} catch (Exception e) {
logger.error("Error in ziggogids testing", e);