]> www.vanbest.org Git - tv_grab_nl_java/commitdiff
ZiggoGids working more or less (no ratings)
authorJan-Pascal van Best <janpascal@vanbest.org>
Mon, 23 Dec 2013 21:09:17 +0000 (22:09 +0100)
committerJan-Pascal van Best <janpascal@vanbest.org>
Mon, 23 Dec 2013 21:09:17 +0000 (22:09 +0100)
src/main/java/org/vanbest/xmltv/Programme.java
src/main/java/org/vanbest/xmltv/ZiggoGids.java

index 884ecd3cbe03e7fc199c2d0f953d55e17130e2f7..232cde8e60a2600396a4f53232f5d22a8b12ae3e 100644 (file)
@@ -152,6 +152,10 @@ public class Programme implements Serializable {
                descriptions.add(new Title(title, lang));
        }
 
+        public boolean hasDescription() {
+            return (descriptions == null || descriptions.size()==0 );
+        }
+
        public void addEpisode(String episode, String system) {
                if (episodes == null)
                        episodes = new ArrayList<Episode>();
index b1a20091dd8e770a18452d92a0f7f106679a177b..d4825707d472a028188256321ce1b45506a664b6 100644 (file)
@@ -25,6 +25,9 @@ import java.net.URL;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.Calendar;
 import java.util.List;
 import java.util.Locale;
 import java.util.Set;
@@ -70,6 +73,7 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource {
 
        private static final int MAX_PROGRAMMES_PER_DAY = 9999;
        private static final int MAX_DAYS_AHEAD_SUPPORTED_BY_ZIGGOGIDS = 3;
+        private static final int MAX_CHANNELS_PER_REQUEST = 25;
 
        public static String NAME = "ziggogids.nl";
 
@@ -83,40 +87,28 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource {
                return NAME;
        }
 
-       public static URL programmeUrl(List<Channel> channels, int day)
+       public static String programmeUrl(int day, int hour)
                        throws Exception {
                StringBuilder s = new StringBuilder(programme_base_url);
-               if (channels.size() < 1) {
-                       throw new Exception("should have at least one channel");
-               }
-               s.append("?channels=");
-               boolean first = true;
-               for (Channel i : channels) {
-                       if (first) {
-                               s.append(i.id);
-                               first = false;
-                       } else {
-                               s.append("," + i.id);
-                       }
-               }
-               s.append("&day=");
-               s.append(day);
-
-               return new URL(s.toString());
+               s.append("/");
+                GregorianCalendar cal = new GregorianCalendar();
+                cal.add(Calendar.DAY_OF_MONTH, day);
+                cal.set(Calendar.HOUR_OF_DAY, hour);
+                cal.set(Calendar.MINUTE, 0);
+                String date = new SimpleDateFormat("yyyyMMdd'T'HHmm").format(cal.getTime());
+               s.append(date);
+
+               return s.toString();
        }
 
-       public static URL DetailUrl(String id) throws Exception {
+       public static String detailUrl(String id) {
                StringBuilder s = new StringBuilder(detail_base_url);
-               s.append("?id=");
+               s.append("/typefav=false?progid=");
                s.append(id);
-               return new URL(s.toString());
+               return s.toString();
        }
 
 
-        private void setActiveChannel(CloseableHttpClient client, String channel) throws IOException {
-            setActiveChannels(client, Collections.singletonList(channel));
-        }
-
         private Document fetchJsoup(CloseableHttpClient client, String url) throws IOException {
             Document doc = null;
 
@@ -133,6 +125,10 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource {
             return doc;
         }
 
+        private void setActiveChannel(CloseableHttpClient client, String channel) throws IOException {
+            setActiveChannels(client, Collections.singletonList(channel));
+        }
+
         private void setActiveChannels(CloseableHttpClient client, List<String> channels) throws IOException {
             Document doc;
             try { 
@@ -162,7 +158,7 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource {
         {
             setActiveChannel(client, channel);
 
-            String url = programme_base_url+"/2013-12-24T1615";
+            String url = programme_base_url+"/";
             Document doc = fetchJsoup(client, url);
 
             // logger.debug("ziggogids programme: " + doc.outerHtml());
@@ -206,6 +202,7 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource {
                     String name = e.select("label").first().text();
                     logger.debug("    "+index+": \""+name+"\"");
                    Channel c = Channel.getChannel(getId(), index, name);
+                    /* Too slow for now
                     try {
                         String icon = fetchIconUrl(httpclient, index);
                         logger.debug("    "+icon);
@@ -213,6 +210,7 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource {
                     } catch (IOException e2) {
                         logger.error("IO Exception trying to get channel log for channel "+index, e2);
                     }
+                    */
                    result.add(c);
                 }
                return result;
@@ -232,270 +230,94 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource {
                        return result; // empty list
                }
 
-               URL url = programmeUrl(channels, day);
-
-               JSONObject jsonObject = fetchJSON(url);
+                CloseableHttpClient httpclient = HttpClients.createDefault();
 
                for (Channel c : channels) {
-                       JSON ps = (JSON) jsonObject.get(c.id);
-                       if (ps.isArray()) {
-                               JSONArray programs = (JSONArray) ps;
-                               for (int i = 0; i < programs.size()
-                                               && i < MAX_PROGRAMMES_PER_DAY; i++) {
-                                       JSONObject programme = programs.getJSONObject(i);
-                                       Programme p = programmeFromJSON(programme,
-                                                       config.fetchDetails);
-                                       p.channel = c.getXmltvChannelId();
-                                       result.add(p);
-                               }
-                       } else {
-                               JSONObject programs = (JSONObject) ps;
-                               int count = 0;
-                               for (Object o : programs.keySet()) {
-                                       if (count > MAX_PROGRAMMES_PER_DAY)
-                                               break;
-                                       JSONObject programme = programs.getJSONObject(o.toString());
-                                       Programme p = programmeFromJSON(programme,
-                                                       config.fetchDetails);
-                                       p.channel = c.getXmltvChannelId();
-                                       result.add(p);
-                                       count++;
-                               }
-                       }
-               }
+                    setActiveChannel(httpclient, c.id);
 
-               return result;
-       }
+                    String url = programmeUrl(day, 20); // hour
+                    logger.debug("url: "+url);
 
-       /*
-        * {"4": [{"db_id":"12436404", "titel":"RTL Boulevard", "genre":"Amusement",
-        * "soort":"Amusementsprogramma", "kijkwijzer":"", "artikel_id":null,
-        * "datum_start":"2012-03-30 23:45:00", "datum_end":"2012-03-31 00:40:00"},
-        * {"db_id":"12436397","titel":"Teleshop 4","genre":"Overige","soort":
-        * "Homeshopping"
-        * ,"kijkwijzer":"","artikel_id":null,"datum_start":"2012-03-31 00:40:00"
-        * ,"datum_end":"2012-03-31 00:41:00"},
-        * {"db_id":"12436398","titel":"Cupido TV"
-        * ,"genre":"Overige","soort":"","kijkwijzer"
-        * :"","artikel_id":null,"datum_start"
-        * :"2012-03-31 00:41:00","datum_end":"2012-03-31 04:30:00"},
-        * {"db_id":"12436399"
-        * ,"titel":"Morning chat","genre":"Overige","soort":"","kijkwijzer"
-        * :"","artikel_id"
-        * :null,"datum_start":"2012-03-31 04:30:00","datum_end":"2012-03-31 06:00:00"
-        * }, ....... ]}
-        */
-       private Programme programmeFromJSON(JSONObject programme,
-                       boolean fetchDetails) throws Exception {
-               String id = programme.getString("db_id");
-               Programme result = cache.get(getId(), id);
-               boolean cached = (result != null);
-               if (result == null) {
-                       stats.cacheMisses++;
-                       result = new Programme();
-                       // Do this here, because we can only add to these fields. Pity if
-                       // they're updated
-                       result.addTitle(programme.getString("titel"));
-                       String genre = programme.getString("genre");
-                       if (genre != null && !genre.isEmpty())
-                               result.addCategory(config.translateCategory(genre));
-                       String kijkwijzer = programme.getString("kijkwijzer");
-                       if (kijkwijzer != null && !kijkwijzer.isEmpty()) {
-                               List<String> list = parseKijkwijzer(kijkwijzer);
-                               if (config.joinKijkwijzerRatings) {
-                                       // mythtv doesn't understand multiple <rating> tags
-                                       result.addRating("kijkwijzer", StringUtils.join(list, ","));
-                               } else {
-                                       for (String rating : list) {
-                                               result.addRating("kijkwijzer", rating);
-                                       }
-                               }
-                               // TODO add icon from HTML detail page
-                       }
-               } else {
-                       // System.out.println("From cache: " +
-                       // programme.getString("titel"));
-                       stats.cacheHits++;
-               }
-               logger.trace("      titel:" + programme.getString("titel"));
-               logger.trace("datum_start:" + programme.getString("datum_start"));
-               logger.trace("  datum_end:" + programme.getString("datum_end"));
-               logger.trace("      genre:" + programme.getString("genre"));
-               SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss",
-                               new Locale("nl"));
-               result.startTime = df.parse(programme.getString("datum_start"));
-               result.endTime = df.parse(programme.getString("datum_end"));
-               // TODO other fields
-
-               if (fetchDetails && !cached) {
-                       // TODO also read details if those have not been cached
-                       fillDetails(id, result);
-               }
-               if (!cached) {
-                       // FIXME where to do this?
-                       cache.put(getId(), id, result);
-               }
-               logger.debug(result);
+                    Document doc;
+                    try {
+                        doc = fetchJsoup(httpclient, url);
+                    } catch (IOException e) {
+                        logger.error("IO Exception trying to get ziggo channel list from "+url, e);
+                        return result;
+                    }
+
+//                    logger.debug("ziggogids programme: " + doc.outerHtml());
+
+                    Elements rows = doc.select(".gids-item-row");
+                    for(Element row: rows) {
+                        logger.debug("*** row ***");
+                        for(Element item: row.select(".gids-row-item")) {
+                            Programme p = programmeFromElement(httpclient, item);
+                            p.channel = c.getXmltvChannelId();
+                            result.add(p);
+                            logger.debug(p.toString());
+                        }
+                    }
+                }
                return result;
        }
 
-       private void fillDetails(String id, Programme result) throws Exception {
-               try {
-                       //fillJSONDetails(id, result);
-               } catch (Exception e) {
-                       logger.warn("Error fetching details for programme "
-                                       + result.toString());
-               }
-               try {
-                       //fillScraperDetails(id, result);
-               } catch (Exception e) {
-                       logger.warn("Error fetching details for programme "
-                                       + result.toString());
-               }
+        private Programme programmeFromElement(CloseableHttpClient httpclient, Element item) {
+            String progid = item.attr("popup-id");
+            long start = Long.parseLong(item.attr("pr-start")); // unix time
+
+            String id = Long.toString(start)+"_"+progid;
+            Programme p = cache.get(getId(), id);
+            boolean cached = (p != null);
+            if (p == null) {
+                stats.cacheMisses++;
+                p = new Programme();
+                String description = item.select(".gids-row-item-title").text();
+                p.addTitle(description);
+            } else {
+                // System.out.println("From cache: " +
+                // programme.getString("titel"));
+                stats.cacheHits++;
+            }
+            p.startTime = new Date(1000L*start);
+            long duration = Integer.parseInt(item.attr("pr-duration")); // minutes
+            p.endTime = new Date(1000L*(start+60*duration));
+            if (config.fetchDetails && ( !cached || !p.hasDescription() ) ) {
+                fillDetails(httpclient, p, progid);
+            }
+            if (!cached) {
+                // FIXME where to do this?
+                cache.put(getId(), id, p);
+            }
+            return p;
+        }
 
-               if ((result.secondaryTitles == null || result.secondaryTitles.isEmpty())
-                               && (!result.hasCategory("movies") && !result
-                                               .hasCategory("film"))) {
-                       for (Programme.Title t : result.titles) {
-                               String[] parts = t.title.split("\\s*:\\s*", 2);
-                               if (parts.length >= 2 && parts[0].length() >= 5) {
-                                       logger.debug("Splitting title from \"" + t.title
-                                                       + "\" to: \"" + parts[0].trim()
-                                                       + "\"; sub-title: \"" + parts[1].trim() + "\"");
-                                       t.title = parts[0].trim();
-                                       result.addSecondaryTitle(parts[1].trim());
-                               }
-                       }
-               }
-       }
+        private void fillDetails(CloseableHttpClient httpclient, Programme p, String progid) {
+            Document doc;
+            String url = detailUrl(progid);
+            try {
+                doc = fetchJsoup(httpclient, url);
+            } catch (IOException e) {
+                logger.error("IO Exception trying to get ziggo detail info from "+url, e);
+                return;
+            }
+            //logger.debug("ziggogids detail: " + doc.outerHtml());
+            Element desc = doc.select(".progpop_descr").first();
+            if(desc!=null) p.addDescription(desc.text());
+            
+            Element kijkwijzer = doc.select(".progpop_kijkwijzer").first();
+            if(kijkwijzer!=null) {
+                // TODO
+            }
+            Element time = doc.select(".progpop_time").first();
+            if(time!=null) {
+                logger.debug("progpop_time: "+time.text());
+                String genre = time.text().replaceFirst("^[^,]+,","").trim();
+                logger.debug("Genre: " + genre);
+                p.addCategory(config.translateCategory(genre));
+            }
+        }
 
-       /*
-        * {"db_id":"12436404", "titel":"RTL Boulevard", "datum":"2012-03-30",
-        * "btijd":"23:45:00", "etijd":"00:40:00", "synop":
-        * "Amusementsprogramma Actualiteiten, vermaak en opinies met \u00e9\u00e9n of twee deskundigen, gasten of andere nieuwsmakers. In hoog tempo volgen afwisselende items en reportages elkaar op met de thema's showbizz, crime, royalty en lifestyle.<br><br>"
-        * , "kijkwijzer":"", "genre":"Amusement",
-        * "presentatie":"Winston Gerschtanowitz, Albert Verlinde",
-        * "acteursnamen_rolverdeling":"", "regisseur":"", "zender_id":"4"}
-        */
-         /*
-       private void fillJSONDetails(String id, Programme result) throws Exception {
-               URL url = JSONDetailUrl(id);
-               JSONObject json = fetchJSON(url);
-               Set<String> keys = json.keySet();
-               for (String key : keys) {
-                       String value = StringEscapeUtils.unescapeHtml(json.getString(key));
-                       if (value.isEmpty())
-                               continue;
-                       if (key.equals("synop")) {
-                               value = value.replaceAll("<br>", " ").replaceAll("<br />", " ")
-                                               .replaceAll("<p[^>]*>", " ").replaceAll("</p>", " ")
-                                               .replaceAll("<strong>", " ")
-                                               .replaceAll("</strong>", " ").replaceAll("<em>", " ")
-                                               .replaceAll("</em>", " ").trim();
-                               if (value.isEmpty())
-                                       continue;
-                               result.addDescription(value);
-                       } else if (key.equals("presentatie")) {
-                               String[] parts = value.split(",");
-                               for (String s : parts) {
-                                       result.addPresenter(s.trim());
-                               }
-                       } else if (key.equals("acteursnamen_rolverdeling")) {
-                               // TODO hoe zouden rollen kunnen worden aangegeven? Geen
-                               // voorbeelden van gezien.
-                               String[] parts = value.split(",");
-                               for (String s : parts) {
-                                       result.addActor(s.trim());
-                               }
-                       } else if (key.equals("regisseur")) {
-                               String[] parts = value.split(",");
-                               for (String s : parts) {
-                                       result.addDirector(s.trim());
-                               }
-                       } else if (key.equals("kijkwijzer")) {
-                               // TODO
-                       } else if (key.equals("db_id")) {
-                               // ignore
-                       } else if (key.equals("titel")) {
-                               // ignore
-                       } else if (key.equals("datum")) {
-                               // ignore
-                       } else if (key.equals("btijd")) {
-                               // ignore
-                       } else if (key.equals("etijd")) {
-                               // ignore
-                       } else if (key.equals("genre")) {
-                               // ignore
-                       } else if (key.equals("zender_id")) {
-                               // ignore
-                       } else {
-                               logger.warn("Unknown key in ziggogids.nl json details: \"" + key
-                                               + "\"");
-                       }
-               }
-       }
-*/
-/*
-       private void fillScraperDetails(String id, Programme result)
-                       throws Exception {
-               Pattern progInfoPattern = Pattern.compile(
-                               "prog-info-content.*prog-info-footer", Pattern.DOTALL);
-               Pattern infoLinePattern = Pattern
-                               .compile("<li><strong>(.*?):</strong>(.*?)</li>");
-               Pattern HDPattern = Pattern.compile("HD \\d+[ip]?");
-               Pattern kijkwijzerPattern = Pattern
-                               .compile("<img src=\"http://ziggogidsassets.nl/img/kijkwijzer/.*?\" alt=\"(.*?)\" />");
-
-               URL url = HTMLDetailUrl(id);
-               String clob = fetchURL(url);
-               Matcher m = progInfoPattern.matcher(clob);
-               if (m.find()) {
-                       String progInfo = m.group();
-                       Matcher m2 = infoLinePattern.matcher(progInfo);
-                       while (m2.find()) {
-                               logger.trace("    infoLine: " + m2.group());
-                               logger.trace("         key: " + m2.group(1));
-                               logger.trace("       value: " + m2.group(2));
-                               String key = m2.group(1).toLowerCase();
-                               String value = m2.group(2);
-                               if (key.equals("bijzonderheden")) {
-                                       String[] list = value.split(",");
-                                       for (String item : list) {
-                                               if (item.toLowerCase().contains("teletekst")) {
-                                                       result.addSubtitle("teletext");
-                                               } else if (item.toLowerCase().contains("breedbeeld")) {
-                                                       result.setVideoAspect("16:9");
-                                               } else if (value.toLowerCase().contains("zwart")) {
-                                                       result.setVideoColour(false);
-                                               } else if (value.toLowerCase().contains("stereo")) {
-                                                       result.setAudioStereo("stereo");
-                                               } else if (value.toLowerCase().contains("herhaling")) {
-                                                       result.setPreviouslyShown();
-                                               } else {
-                                                       Matcher m3 = HDPattern.matcher(value);
-                                                       if (m3.find()) {
-                                                               result.setVideoQuality(m3.group());
-                                                       } else {
-                                                               logger.warn("  Unknown value in 'bijzonderheden': "
-                                                                               + item);
-                                                       }
-                                               }
-                                       }
-                               } else {
-                                       // ignore other keys for now
-                               }
-                               Matcher m3 = kijkwijzerPattern.matcher(progInfo);
-                               List<String> kijkwijzer = new ArrayList<String>();
-                               while (m3.find()) {
-                                       kijkwijzer.add(m3.group(1));
-                               }
-                               if (!kijkwijzer.isEmpty()) {
-                                       // logger.debug("    kijkwijzer: " + kijkwijzer);
-                               }
-                       }
-               }
-       }
-*/
        /**
         * @param args
         */
@@ -506,7 +328,7 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource {
                try {
                        List<Channel> channels = gids.getChannels();
                        System.out.println("Channels: " + channels);
-                        /*
+                        
                        XMLStreamWriter writer = XMLOutputFactory.newInstance()
                                        .createXMLStreamWriter(new FileWriter("ziggogids.xml"));
                        writer.writeStartDocument();
@@ -515,7 +337,8 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource {
                        writer.writeCharacters("\n");
                        writer.writeStartElement("tv");
                        // List<Channel> my_channels = channels;
-                       List<Channel> my_channels = channels.subList(0, 15);
+                       //List<Channel> my_channels = channels.subList(0, 15);
+                       List<Channel> my_channels = channels.subList(0, 3);
                        for (Channel c : my_channels) {
                                c.serialize(writer, true);
                        }
@@ -536,7 +359,7 @@ public class ZiggoGids extends AbstractEPGSource implements EPGSource {
                                System.out.println("Number of fetch errors: "
                                                + stats.fetchErrors);
                        }
-                        */
+                        
                        gids.close();
                } catch (Exception e) {
                        logger.error("Error in ziggogids testing", e);