From: Jan-Pascal van Best Date: Mon, 23 Dec 2013 21:09:17 +0000 (+0100) Subject: ZiggoGids working more or less (no ratings) X-Git-Tag: 1.5.0~2 X-Git-Url: http://www.vanbest.org/gitweb/?a=commitdiff_plain;h=5c8f8c4ebb48e6e284ef6de4d80ed38636b10db2;p=tv_grab_nl_java ZiggoGids working more or less (no ratings) --- diff --git a/src/main/java/org/vanbest/xmltv/Programme.java b/src/main/java/org/vanbest/xmltv/Programme.java index 884ecd3..232cde8 100644 --- a/src/main/java/org/vanbest/xmltv/Programme.java +++ b/src/main/java/org/vanbest/xmltv/Programme.java @@ -152,6 +152,10 @@ public class Programme implements Serializable { descriptions.add(new Title(title, lang)); } + public boolean hasDescription() { + return (descriptions == null || descriptions.size()==0 ); + } + public void addEpisode(String episode, String system) { if (episodes == null) episodes = new ArrayList(); diff --git a/src/main/java/org/vanbest/xmltv/ZiggoGids.java b/src/main/java/org/vanbest/xmltv/ZiggoGids.java index b1a2009..d482570 100644 --- a/src/main/java/org/vanbest/xmltv/ZiggoGids.java +++ b/src/main/java/org/vanbest/xmltv/ZiggoGids.java @@ -25,6 +25,9 @@ import java.net.URL; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collections; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.Calendar; import java.util.List; import java.util.Locale; import java.util.Set; @@ -70,6 +73,7 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource { private static final int MAX_PROGRAMMES_PER_DAY = 9999; private static final int MAX_DAYS_AHEAD_SUPPORTED_BY_ZIGGOGIDS = 3; + private static final int MAX_CHANNELS_PER_REQUEST = 25; public static String NAME = "ziggogids.nl"; @@ -83,40 +87,28 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource { return NAME; } - public static URL programmeUrl(List channels, int day) + public static String programmeUrl(int day, int hour) throws Exception { StringBuilder s = new StringBuilder(programme_base_url); - if (channels.size() < 1) { - throw new Exception("should have at least one channel"); - } - s.append("?channels="); - boolean first = true; - for (Channel i : channels) { - if (first) { - s.append(i.id); - first = false; - } else { - s.append("," + i.id); - } - } - s.append("&day="); - s.append(day); - - return new URL(s.toString()); + s.append("/"); + GregorianCalendar cal = new GregorianCalendar(); + cal.add(Calendar.DAY_OF_MONTH, day); + cal.set(Calendar.HOUR_OF_DAY, hour); + cal.set(Calendar.MINUTE, 0); + String date = new SimpleDateFormat("yyyyMMdd'T'HHmm").format(cal.getTime()); + s.append(date); + + return s.toString(); } - public static URL DetailUrl(String id) throws Exception { + public static String detailUrl(String id) { StringBuilder s = new StringBuilder(detail_base_url); - s.append("?id="); + s.append("/typefav=false?progid="); s.append(id); - return new URL(s.toString()); + return s.toString(); } - private void setActiveChannel(CloseableHttpClient client, String channel) throws IOException { - setActiveChannels(client, Collections.singletonList(channel)); - } - private Document fetchJsoup(CloseableHttpClient client, String url) throws IOException { Document doc = null; @@ -133,6 +125,10 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource { return doc; } + private void setActiveChannel(CloseableHttpClient client, String channel) throws IOException { + setActiveChannels(client, Collections.singletonList(channel)); + } + private void setActiveChannels(CloseableHttpClient client, List channels) throws IOException { Document doc; try { @@ -162,7 +158,7 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource { { setActiveChannel(client, channel); - String url = programme_base_url+"/2013-12-24T1615"; + String url = programme_base_url+"/"; Document doc = fetchJsoup(client, url); // logger.debug("ziggogids programme: " + doc.outerHtml()); @@ -206,6 +202,7 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource { String name = e.select("label").first().text(); logger.debug(" "+index+": \""+name+"\""); Channel c = Channel.getChannel(getId(), index, name); + /* Too slow for now try { String icon = fetchIconUrl(httpclient, index); logger.debug(" "+icon); @@ -213,6 +210,7 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource { } catch (IOException e2) { logger.error("IO Exception trying to get channel log for channel "+index, e2); } + */ result.add(c); } return result; @@ -232,270 +230,94 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource { return result; // empty list } - URL url = programmeUrl(channels, day); - - JSONObject jsonObject = fetchJSON(url); + CloseableHttpClient httpclient = HttpClients.createDefault(); for (Channel c : channels) { - JSON ps = (JSON) jsonObject.get(c.id); - if (ps.isArray()) { - JSONArray programs = (JSONArray) ps; - for (int i = 0; i < programs.size() - && i < MAX_PROGRAMMES_PER_DAY; i++) { - JSONObject programme = programs.getJSONObject(i); - Programme p = programmeFromJSON(programme, - config.fetchDetails); - p.channel = c.getXmltvChannelId(); - result.add(p); - } - } else { - JSONObject programs = (JSONObject) ps; - int count = 0; - for (Object o : programs.keySet()) { - if (count > MAX_PROGRAMMES_PER_DAY) - break; - JSONObject programme = programs.getJSONObject(o.toString()); - Programme p = programmeFromJSON(programme, - config.fetchDetails); - p.channel = c.getXmltvChannelId(); - result.add(p); - count++; - } - } - } + setActiveChannel(httpclient, c.id); - return result; - } + String url = programmeUrl(day, 20); // hour + logger.debug("url: "+url); - /* - * {"4": [{"db_id":"12436404", "titel":"RTL Boulevard", "genre":"Amusement", - * "soort":"Amusementsprogramma", "kijkwijzer":"", "artikel_id":null, - * "datum_start":"2012-03-30 23:45:00", "datum_end":"2012-03-31 00:40:00"}, - * {"db_id":"12436397","titel":"Teleshop 4","genre":"Overige","soort": - * "Homeshopping" - * ,"kijkwijzer":"","artikel_id":null,"datum_start":"2012-03-31 00:40:00" - * ,"datum_end":"2012-03-31 00:41:00"}, - * {"db_id":"12436398","titel":"Cupido TV" - * ,"genre":"Overige","soort":"","kijkwijzer" - * :"","artikel_id":null,"datum_start" - * :"2012-03-31 00:41:00","datum_end":"2012-03-31 04:30:00"}, - * {"db_id":"12436399" - * ,"titel":"Morning chat","genre":"Overige","soort":"","kijkwijzer" - * :"","artikel_id" - * :null,"datum_start":"2012-03-31 04:30:00","datum_end":"2012-03-31 06:00:00" - * }, ....... ]} - */ - private Programme programmeFromJSON(JSONObject programme, - boolean fetchDetails) throws Exception { - String id = programme.getString("db_id"); - Programme result = cache.get(getId(), id); - boolean cached = (result != null); - if (result == null) { - stats.cacheMisses++; - result = new Programme(); - // Do this here, because we can only add to these fields. Pity if - // they're updated - result.addTitle(programme.getString("titel")); - String genre = programme.getString("genre"); - if (genre != null && !genre.isEmpty()) - result.addCategory(config.translateCategory(genre)); - String kijkwijzer = programme.getString("kijkwijzer"); - if (kijkwijzer != null && !kijkwijzer.isEmpty()) { - List list = parseKijkwijzer(kijkwijzer); - if (config.joinKijkwijzerRatings) { - // mythtv doesn't understand multiple tags - result.addRating("kijkwijzer", StringUtils.join(list, ",")); - } else { - for (String rating : list) { - result.addRating("kijkwijzer", rating); - } - } - // TODO add icon from HTML detail page - } - } else { - // System.out.println("From cache: " + - // programme.getString("titel")); - stats.cacheHits++; - } - logger.trace(" titel:" + programme.getString("titel")); - logger.trace("datum_start:" + programme.getString("datum_start")); - logger.trace(" datum_end:" + programme.getString("datum_end")); - logger.trace(" genre:" + programme.getString("genre")); - SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", - new Locale("nl")); - result.startTime = df.parse(programme.getString("datum_start")); - result.endTime = df.parse(programme.getString("datum_end")); - // TODO other fields - - if (fetchDetails && !cached) { - // TODO also read details if those have not been cached - fillDetails(id, result); - } - if (!cached) { - // FIXME where to do this? - cache.put(getId(), id, result); - } - logger.debug(result); + Document doc; + try { + doc = fetchJsoup(httpclient, url); + } catch (IOException e) { + logger.error("IO Exception trying to get ziggo channel list from "+url, e); + return result; + } + +// logger.debug("ziggogids programme: " + doc.outerHtml()); + + Elements rows = doc.select(".gids-item-row"); + for(Element row: rows) { + logger.debug("*** row ***"); + for(Element item: row.select(".gids-row-item")) { + Programme p = programmeFromElement(httpclient, item); + p.channel = c.getXmltvChannelId(); + result.add(p); + logger.debug(p.toString()); + } + } + } return result; } - private void fillDetails(String id, Programme result) throws Exception { - try { - //fillJSONDetails(id, result); - } catch (Exception e) { - logger.warn("Error fetching details for programme " - + result.toString()); - } - try { - //fillScraperDetails(id, result); - } catch (Exception e) { - logger.warn("Error fetching details for programme " - + result.toString()); - } + private Programme programmeFromElement(CloseableHttpClient httpclient, Element item) { + String progid = item.attr("popup-id"); + long start = Long.parseLong(item.attr("pr-start")); // unix time + + String id = Long.toString(start)+"_"+progid; + Programme p = cache.get(getId(), id); + boolean cached = (p != null); + if (p == null) { + stats.cacheMisses++; + p = new Programme(); + String description = item.select(".gids-row-item-title").text(); + p.addTitle(description); + } else { + // System.out.println("From cache: " + + // programme.getString("titel")); + stats.cacheHits++; + } + p.startTime = new Date(1000L*start); + long duration = Integer.parseInt(item.attr("pr-duration")); // minutes + p.endTime = new Date(1000L*(start+60*duration)); + if (config.fetchDetails && ( !cached || !p.hasDescription() ) ) { + fillDetails(httpclient, p, progid); + } + if (!cached) { + // FIXME where to do this? + cache.put(getId(), id, p); + } + return p; + } - if ((result.secondaryTitles == null || result.secondaryTitles.isEmpty()) - && (!result.hasCategory("movies") && !result - .hasCategory("film"))) { - for (Programme.Title t : result.titles) { - String[] parts = t.title.split("\\s*:\\s*", 2); - if (parts.length >= 2 && parts[0].length() >= 5) { - logger.debug("Splitting title from \"" + t.title - + "\" to: \"" + parts[0].trim() - + "\"; sub-title: \"" + parts[1].trim() + "\""); - t.title = parts[0].trim(); - result.addSecondaryTitle(parts[1].trim()); - } - } - } - } + private void fillDetails(CloseableHttpClient httpclient, Programme p, String progid) { + Document doc; + String url = detailUrl(progid); + try { + doc = fetchJsoup(httpclient, url); + } catch (IOException e) { + logger.error("IO Exception trying to get ziggo detail info from "+url, e); + return; + } + //logger.debug("ziggogids detail: " + doc.outerHtml()); + Element desc = doc.select(".progpop_descr").first(); + if(desc!=null) p.addDescription(desc.text()); + + Element kijkwijzer = doc.select(".progpop_kijkwijzer").first(); + if(kijkwijzer!=null) { + // TODO + } + Element time = doc.select(".progpop_time").first(); + if(time!=null) { + logger.debug("progpop_time: "+time.text()); + String genre = time.text().replaceFirst("^[^,]+,","").trim(); + logger.debug("Genre: " + genre); + p.addCategory(config.translateCategory(genre)); + } + } - /* - * {"db_id":"12436404", "titel":"RTL Boulevard", "datum":"2012-03-30", - * "btijd":"23:45:00", "etijd":"00:40:00", "synop": - * "Amusementsprogramma Actualiteiten, vermaak en opinies met \u00e9\u00e9n of twee deskundigen, gasten of andere nieuwsmakers. In hoog tempo volgen afwisselende items en reportages elkaar op met de thema's showbizz, crime, royalty en lifestyle.

" - * , "kijkwijzer":"", "genre":"Amusement", - * "presentatie":"Winston Gerschtanowitz, Albert Verlinde", - * "acteursnamen_rolverdeling":"", "regisseur":"", "zender_id":"4"} - */ - /* - private void fillJSONDetails(String id, Programme result) throws Exception { - URL url = JSONDetailUrl(id); - JSONObject json = fetchJSON(url); - Set keys = json.keySet(); - for (String key : keys) { - String value = StringEscapeUtils.unescapeHtml(json.getString(key)); - if (value.isEmpty()) - continue; - if (key.equals("synop")) { - value = value.replaceAll("
", " ").replaceAll("
", " ") - .replaceAll("]*>", " ").replaceAll("

", " ") - .replaceAll("", " ") - .replaceAll("", " ").replaceAll("", " ") - .replaceAll("", " ").trim(); - if (value.isEmpty()) - continue; - result.addDescription(value); - } else if (key.equals("presentatie")) { - String[] parts = value.split(","); - for (String s : parts) { - result.addPresenter(s.trim()); - } - } else if (key.equals("acteursnamen_rolverdeling")) { - // TODO hoe zouden rollen kunnen worden aangegeven? Geen - // voorbeelden van gezien. - String[] parts = value.split(","); - for (String s : parts) { - result.addActor(s.trim()); - } - } else if (key.equals("regisseur")) { - String[] parts = value.split(","); - for (String s : parts) { - result.addDirector(s.trim()); - } - } else if (key.equals("kijkwijzer")) { - // TODO - } else if (key.equals("db_id")) { - // ignore - } else if (key.equals("titel")) { - // ignore - } else if (key.equals("datum")) { - // ignore - } else if (key.equals("btijd")) { - // ignore - } else if (key.equals("etijd")) { - // ignore - } else if (key.equals("genre")) { - // ignore - } else if (key.equals("zender_id")) { - // ignore - } else { - logger.warn("Unknown key in ziggogids.nl json details: \"" + key - + "\""); - } - } - } -*/ -/* - private void fillScraperDetails(String id, Programme result) - throws Exception { - Pattern progInfoPattern = Pattern.compile( - "prog-info-content.*prog-info-footer", Pattern.DOTALL); - Pattern infoLinePattern = Pattern - .compile("
  • (.*?):(.*?)
  • "); - Pattern HDPattern = Pattern.compile("HD \\d+[ip]?"); - Pattern kijkwijzerPattern = Pattern - .compile("\"(.*?)\""); - - URL url = HTMLDetailUrl(id); - String clob = fetchURL(url); - Matcher m = progInfoPattern.matcher(clob); - if (m.find()) { - String progInfo = m.group(); - Matcher m2 = infoLinePattern.matcher(progInfo); - while (m2.find()) { - logger.trace(" infoLine: " + m2.group()); - logger.trace(" key: " + m2.group(1)); - logger.trace(" value: " + m2.group(2)); - String key = m2.group(1).toLowerCase(); - String value = m2.group(2); - if (key.equals("bijzonderheden")) { - String[] list = value.split(","); - for (String item : list) { - if (item.toLowerCase().contains("teletekst")) { - result.addSubtitle("teletext"); - } else if (item.toLowerCase().contains("breedbeeld")) { - result.setVideoAspect("16:9"); - } else if (value.toLowerCase().contains("zwart")) { - result.setVideoColour(false); - } else if (value.toLowerCase().contains("stereo")) { - result.setAudioStereo("stereo"); - } else if (value.toLowerCase().contains("herhaling")) { - result.setPreviouslyShown(); - } else { - Matcher m3 = HDPattern.matcher(value); - if (m3.find()) { - result.setVideoQuality(m3.group()); - } else { - logger.warn(" Unknown value in 'bijzonderheden': " - + item); - } - } - } - } else { - // ignore other keys for now - } - Matcher m3 = kijkwijzerPattern.matcher(progInfo); - List kijkwijzer = new ArrayList(); - while (m3.find()) { - kijkwijzer.add(m3.group(1)); - } - if (!kijkwijzer.isEmpty()) { - // logger.debug(" kijkwijzer: " + kijkwijzer); - } - } - } - } -*/ /** * @param args */ @@ -506,7 +328,7 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource { try { List channels = gids.getChannels(); System.out.println("Channels: " + channels); - /* + XMLStreamWriter writer = XMLOutputFactory.newInstance() .createXMLStreamWriter(new FileWriter("ziggogids.xml")); writer.writeStartDocument(); @@ -515,7 +337,8 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource { writer.writeCharacters("\n"); writer.writeStartElement("tv"); // List my_channels = channels; - List my_channels = channels.subList(0, 15); + //List my_channels = channels.subList(0, 15); + List my_channels = channels.subList(0, 3); for (Channel c : my_channels) { c.serialize(writer, true); } @@ -536,7 +359,7 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource { System.out.println("Number of fetch errors: " + stats.fetchErrors); } - */ + gids.close(); } catch (Exception e) { logger.error("Error in ziggogids testing", e);