]> www.vanbest.org Git - tv_grab_nl_java/commitdiff
Work on RTL: sub-titles, subtitles, presenters, urls; debug code to write RTL output...
authorJan-Pascal van Best <janpascal@vanbest.org>
Thu, 29 Mar 2012 15:52:24 +0000 (17:52 +0200)
committerJan-Pascal van Best <janpascal@vanbest.org>
Thu, 29 Mar 2012 15:52:24 +0000 (17:52 +0200)
.gitignore
pom.xml
src/main/java/org/vanbest/xmltv/Programme.java
src/main/java/org/vanbest/xmltv/RTL.java

index 1293e4dd884e8396f8341504c55a9fd055eb1f0b..9f114ba114ea932df0201c7e0f094c5400e0a55f 100644 (file)
@@ -1,2 +1,4 @@
 /test.xml
+/rtl.xml
 /target
+/testdb.*
diff --git a/pom.xml b/pom.xml
index 2a7aa91dbf54f9985435851aebcd8c1658228498..1a5e167c003f663e70d9ff9cc5d416476e12af41 100644 (file)
--- a/pom.xml
+++ b/pom.xml
                        <artifactId>hsqldb</artifactId>
                        <version>2.2.8</version>
                </dependency>
+               <dependency>
+                       <groupId>mysql</groupId>
+                       <artifactId>mysql-connector-java</artifactId>
+                       <version>5.1.18</version>
+               </dependency>
        </dependencies>
        <build>
                <plugins>
index 2e53b503778bc8344a26b01a0d9d24c2e475a013..08fbc4992535c1c82258c1429933bd1a26e63bc8 100644 (file)
@@ -15,6 +15,10 @@ public class Programme {
        class Title {
                String title;
                String lang;
+           public Title(String title, String lang) {
+               this.title = title;
+               this.lang = lang;
+           }
        }
        class Actor {
                String name;
@@ -55,6 +59,10 @@ public class Programme {
                boolean present;
                String stereo; // 'mono','stereo','dolby','dolby digital','bilingual' or 'surround'. 
        }
+       class Subtitle {
+               String type; // teletext | onscreen | deaf-signed
+               Title language;
+       }
        public Date startTime; // required
        public Date endTime;
     public Date pdcStart;
@@ -65,7 +73,7 @@ public class Programme {
     public String clumpidx;    
     
     public List<Title> titles; // at least one
-    public List<Title> subtitles; 
+    public List<Title> secondaryTitles; 
     public List<Title> descriptions; 
     public Credits credits;
     public Date date; // copyright date, original date
@@ -74,37 +82,88 @@ public class Programme {
     Title origLanguage;
     Length length;
     public List<Icon> icons;
-    public List<URL> urls;
+    public List<String> urls;
     public List<Title> countries;
     public List<Episode> episodes;
     public Video video;
     public Audio audio;
     /*
     previously-shown?, premiere?, last-chance?, new?,
-    subtitles*, rating*, star-rating*, review* 
     */
-    
+    public List<Subtitle> subtitles; 
+    /*rating*, star-rating*, review* 
+    */
+
     public void addTitle(String title) {
        addTitle(title, null);
     }
     public void addTitle(String title, String lang) {
        if(titles==null) titles = new ArrayList<Title>();
-       Title t = new Title();
-       t.title = title;
-       t.lang = lang;
-       titles.add(t);
+       titles.add(new Title(title,lang));
+    }
+       public void addSecondaryTitle(String title) {
+               addSecondaryTitle(title,null);
+       }
+    public void addSecondaryTitle(String title, String lang) {
+       if(secondaryTitles==null) secondaryTitles = new ArrayList<Title>();
+       secondaryTitles.add(new Title(title,lang));
     }
+    
        public void addCategory(String category) {
        addCategory(category, null);
     }
     public void addCategory(String category, String lang) {
        if(categories==null) categories = new ArrayList<Title>();
-       Title t = new Title();
-       t.title = category;
-       t.lang = lang;
-       categories.add(t);
+       categories.add(new Title(category,lang));
+    }
+       public void addSubtitle(String type) {
+       addCategory(type, null);
+    }
+    public void addSubtitle(String type, String language, String language_lang) {
+       if(subtitles==null) subtitles = new ArrayList<Subtitle>();
+       Subtitle s = new Subtitle();
+       s.type = type;
+       if (language != null) {
+               s.language = new Title(language,language_lang);
+       }
+       subtitles.add(s);
     }
+       public void addPresenter(String pres) {
+               if (credits == null) credits = new Credits();
+               if (credits.presenters==null) {
+                       credits.presenters=new ArrayList<String>();
+               }
+               credits.presenters.add(pres);
+       }
+       public void addUrl(String url) {
+               if(urls==null) urls=new ArrayList<String>();
+               urls.add(url);
+       }
 
+       private void writeTitle(Title title, String tag,
+                       XMLStreamWriter writer) throws XMLStreamException {
+               if(title==null) return;
+               writer.writeStartElement(tag);
+               if (title.lang != null) writer.writeAttribute("lang", title.lang);
+               if (title.title != null) writer.writeCharacters(title.title);
+               writer.writeEndElement();
+       }
+       private void writeTitleList(List<Title> titles, String tag,
+                       XMLStreamWriter writer) throws XMLStreamException {
+               if(titles==null) return;
+               for(Title title: titles) {
+                       writeTitle(title,tag,writer);
+               }
+       }
+       private void writeStringList(List<String> strings, String tag,
+                       XMLStreamWriter writer) throws XMLStreamException {
+               if(strings==null) return;
+               for(String s:strings) {
+                       writer.writeStartElement(tag);
+                       writer.writeCharacters(s);
+                       writer.writeEndElement();
+               }
+       }
     public void serialize(XMLStreamWriter writer) throws XMLStreamException {
                DateFormat df = new SimpleDateFormat("yyyyMMddHHmmss Z");
 
@@ -112,19 +171,20 @@ public class Programme {
                if(startTime != null) writer.writeAttribute("start", df.format(startTime));
                if(endTime != null) writer.writeAttribute("stop", df.format(endTime));
                if(channel != null) writer.writeAttribute("channel", ""+channel.id);
-               if(titles != null) {
-                       for(Title title: titles) {
-                               writer.writeStartElement("title");
-                               if (title.lang != null) writer.writeAttribute("lang", title.lang);
-                               if (title.title != null) writer.writeCharacters(title.title);
-                               writer.writeEndElement();
-                       }
+               writeTitleList(titles,"title",writer);
+               writeTitleList(secondaryTitles,"sub-title", writer); 
+               if(credits != null) {
+                       writer.writeStartElement("credits");
+                       writeStringList(credits.presenters,"presenter",writer);
+                       writer.writeEndElement();
                }
-               if(categories != null) {
-                       for(Title category: categories) {
-                               writer.writeStartElement("category");
-                               if (category.lang != null) writer.writeAttribute("lang", category.lang);
-                               if (category.title != null) writer.writeCharacters(category.title);
+               writeTitleList(categories, "category", writer);
+               writeStringList(urls,"url",writer);
+               if(subtitles != null) {
+                       for(Subtitle s: subtitles) {
+                               writer.writeStartElement("subtitles");
+                               if (s.type != null) writer.writeAttribute("type", s.type);
+                               if (s.language != null) writeTitle(s.language,"language",writer);
                                writer.writeEndElement();
                        }
                }
@@ -140,5 +200,4 @@ public class Programme {
                writer.writeEndElement();
                writer.writeCharacters(System.getProperty("line.separator"));
        }
-    
 }
index 955dc70cf3b48fa5062edc93e4bc1bad4a0ba26b..891ede22d4b3f037f07970846a800de2adc719ff 100644 (file)
@@ -1,8 +1,11 @@
 package org.vanbest.xmltv;\r
 \r
 import java.io.BufferedReader;\r
+import java.io.FileInputStream;\r
 import java.io.FileNotFoundException;\r
+import java.io.FileWriter;\r
 import java.io.IOException;\r
+import java.io.InputStream;\r
 import java.io.InputStreamReader;\r
 import java.net.MalformedURLException;\r
 import java.net.URL;\r
@@ -21,6 +24,7 @@ import java.util.HashSet;
 import java.util.LinkedList;\r
 import java.util.List;\r
 import java.util.Map;\r
+import java.util.Properties;\r
 import java.util.Set;\r
 \r
 import javax.xml.parsers.DocumentBuilderFactory;\r
@@ -39,6 +43,7 @@ import net.sf.json.JSON;
 import net.sf.json.JSONArray;\r
 import net.sf.json.JSONObject;\r
 \r
+import org.apache.commons.io.FileUtils;\r
 import org.vanbest.xmltv.EPGSource.Stats;\r
 import org.w3c.dom.DOMException;\r
 import org.w3c.dom.Document;\r
@@ -53,7 +58,7 @@ public class RTL extends AbstractEPGSource implements EPGSource  {
        private static final String detail_url="http://www.rtl.nl/active/epg_data/uitzending_data/";\r
        private static final String icon_url="http://www.rtl.nl/service/gids/components/vaste_componenten/";\r
        private static final String xmltv_channel_suffix = ".rtl.nl";\r
-       private static final int MAX_PROGRAMMES_PER_DAY = 99999;\r
+       private static final int MAX_PROGRAMMES_PER_DAY = 200000;\r
        \r
        private Connection db;\r
        \r
@@ -68,25 +73,36 @@ public class RTL extends AbstractEPGSource implements EPGSource  {
                }\r
        }\r
        \r
-       public RTL(Config config) {\r
+       public RTL(Config config, boolean useDB) {\r
                super(config);\r
                try {\r
-                       db = DriverManager.getConnection("jdbc:hsqldb:file:testdb", "SA", "");\r
-                       Statement stat = db.createStatement();\r
-                       StringBuilder s = new StringBuilder();\r
-                       s.append("CREATE TABLE IF NOT EXISTS prog (id VARCHAR(32) primary key, ");\r
-                       int i=0;\r
-                       for( String key: xmlKeys) {\r
-                               if(i>0) s.append(", ");\r
-                               xmlKeyMap.put(key, i+1);\r
-                               s.append(key);\r
-                               s.append(" VARCHAR(4096)");\r
-                               i++;\r
+                       if (useDB) {\r
+                               Properties dbProp = new Properties();\r
+                       try {\r
+                               InputStream in = new FileInputStream("tv_grab_nl_java.db.properties");\r
+                           dbProp.load(in);\r
+                       } catch (IOException e) {\r
+                           e.printStackTrace();\r
+                       }\r
+                       db = DriverManager.getConnection(dbProp.getProperty("db_url"), dbProp.getProperty("db_user"), dbProp.getProperty("db_passwd"));\r
+                               Statement stat = db.createStatement();\r
+                               StringBuilder s = new StringBuilder();\r
+                               s.append("CREATE TABLE IF NOT EXISTS prog (id VARCHAR(32) primary key, ");\r
+                               int i=0;\r
+                               for( String key: xmlKeys) {\r
+                                       if(i>0) s.append(", ");\r
+                                       xmlKeyMap.put(key, i+1);\r
+                                       s.append(key);\r
+                                       s.append(" TEXT");\r
+                                       i++;\r
+                               }\r
+                               s.append(");");\r
+                               System.out.println(s);\r
+                               stat.execute(s.toString());\r
+                               stat.execute("TRUNCATE TABLE prog");\r
+                       } else {\r
+                               db = null;\r
                        }\r
-                       s.append(");");\r
-                       System.out.println(s);\r
-                       stat.execute(s.toString());\r
-                       stat.execute("TRUNCATE TABLE prog");\r
                } catch (SQLException e) {\r
                        // TODO Auto-generated catch block\r
                        e.printStackTrace();\r
@@ -165,21 +181,24 @@ public class RTL extends AbstractEPGSource implements EPGSource  {
                if (root.hasAttributes()) {\r
                        System.out.println("Unknown attributes for RTL detail root node");\r
                }\r
-               StringBuilder sql = new StringBuilder("INSERT INTO prog (id");\r
-               StringBuilder sql2= new StringBuilder(") values (?");\r
-               for(String key:xmlKeys) {\r
-                       sql.append(",");\r
-                       sql.append(key);\r
-                       sql2.append(",");\r
-                       sql2.append("?");\r
-               }\r
-               sql.append(sql2);\r
-               sql.append(");");\r
-               // System.out.println(sql.toString());\r
-               PreparedStatement stat = db.prepareStatement(sql.toString());\r
-               stat.setString(1, id);\r
-               for(String key:xmlKeys) {\r
-                       \r
+               PreparedStatement stat = null;\r
+               if (db != null) {\r
+                       StringBuilder sql = new StringBuilder("INSERT INTO prog (id");\r
+                       StringBuilder sql2= new StringBuilder(") values (?");\r
+                       for(String key:xmlKeys) {\r
+                               sql.append(",");\r
+                               sql.append(key);\r
+                               sql2.append(",");\r
+                               sql2.append("?");\r
+                       }\r
+                       sql.append(sql2);\r
+                       sql.append(");");\r
+                       // System.out.println(sql.toString());\r
+                       stat = db.prepareStatement(sql.toString());\r
+                       stat.setString(1, id);\r
+                       for(String key:xmlKeys) {\r
+                               \r
+                       }\r
                }\r
                NodeList nodes = root.getChildNodes();\r
                for( int i=0; i<nodes.getLength(); i++) {\r
@@ -204,8 +223,10 @@ public class RTL extends AbstractEPGSource implements EPGSource  {
                                        continue;\r
                                }\r
                        }\r
-                       System.out.println(stat.toString());\r
-                       stat.execute();\r
+                       //System.out.println(stat.toString());\r
+                       if (db != null) {\r
+                               stat.execute();\r
+                       }\r
                }\r
        }\r
 \r
@@ -226,8 +247,13 @@ public class RTL extends AbstractEPGSource implements EPGSource  {
                        }\r
                }\r
                Element e = (Element)n;\r
-               stat.setString(xmlKeyMap.get(e.getTagName())+1, e.getTextContent());\r
+               if (db != null) {\r
+                       stat.setString(xmlKeyMap.get(e.getTagName())+1, e.getTextContent());\r
+               }\r
                String tag = e.getTagName();\r
+               if (e.getTextContent().isEmpty()) {\r
+                       return;\r
+               }\r
                if (tag.equals("genre")) {\r
                        prog.addCategory(config.translateCategory(e.getTextContent()));\r
                } else if (tag.equals("eindtijd")) {\r
@@ -235,16 +261,31 @@ public class RTL extends AbstractEPGSource implements EPGSource  {
                } else if (tag.equals("omroep")) {\r
                } else if (tag.equals("kijkwijzer")) {\r
                } else if (tag.equals("presentatie")) {\r
+                       // A\r
+                       // A en B\r
+                       // A, B, C en D\r
+                       String[] presentatoren = e.getTextContent().split(", | en ");\r
+                       for(String pres:presentatoren) {\r
+                               prog.addPresenter(pres);\r
+                       }\r
                } else if (tag.equals("wwwadres")) {\r
+                       prog.addUrl(e.getTextContent());\r
                } else if (tag.equals("alginhoud")) {\r
                } else if (tag.equals("inhoud")) {\r
                } else if (tag.equals("tt_inhoud")) {\r
+                       // ignore, is summary of other fields\r
                } else if (tag.equals("zendernr")) {\r
                } else if (tag.equals("titel")) {\r
                } else if (tag.equals("bijvnwlanden")) {\r
                } else if (tag.equals("afl_titel")) {\r
+                       prog.addSecondaryTitle(e.getTextContent());\r
                } else if (tag.equals("site_path")) {\r
                } else if (tag.equals("ondertiteling")) {\r
+                       if(e.getTextContent().equals("J")) {\r
+                               prog.addSubtitle("teletext");\r
+                       } else {\r
+                               throw new RTLException("Ignoring unknown value \"" + n.getTextContent() + "\" for tag ondertiteling");\r
+                       }\r
                } else if (tag.equals("begintijd")) {\r
                } else if (tag.equals("pgmsoort")) {\r
                } else {\r
@@ -274,7 +315,7 @@ public class RTL extends AbstractEPGSource implements EPGSource  {
                for( Object k: o.keySet()) {\r
                        String id = genericChannelId(k.toString());\r
                        if(!channelMap.containsKey(id)) {\r
-                               System.out.println("Skipping programmes for channel " + id);\r
+                               if (!config.quiet) System.out.println("Skipping programmes for channel " + id);\r
                                continue;\r
                        }\r
                        JSONArray j = (JSONArray) o.get(k);\r
@@ -328,20 +369,19 @@ public class RTL extends AbstractEPGSource implements EPGSource  {
         */\r
        public static void main(String[] args) {\r
                Config config = Config.getDefaultConfig();\r
-               System.exit(0);\r
-               RTL rtl = new RTL(config);\r
+               RTL rtl = new RTL(config, false);\r
                try {\r
                        List<Channel> channels = rtl.getChannels();\r
                        System.out.println("Channels: " + channels);\r
-                       XMLStreamWriter writer = XMLOutputFactory.newInstance().createXMLStreamWriter(System.out);\r
-                       \r
+                       XMLStreamWriter writer = XMLOutputFactory.newInstance().createXMLStreamWriter(new FileWriter("rtl.xml"));\r
                        writer.writeStartDocument();\r
                        writer.writeCharacters("\n");\r
                        writer.writeDTD("<!DOCTYPE tv SYSTEM \"xmltv.dtd\">");\r
                        writer.writeCharacters("\n");\r
                        writer.writeStartElement("tv");\r
                        for(Channel c: channels) {c.serialize(writer);}\r
-                       //List<Programme> programmes = rtl.getProgrammes1(channels.subList(0, 13), 0, true);\r
+                       writer.flush();\r
+                       //List<Programme> programmes = rtl.getProgrammes1(channels.subList(6, 9), 0, true);\r
                        List<Programme> programmes = rtl.getProgrammes1(channels, 0, true);\r
                        for(Programme p: programmes) {p.serialize(writer);}\r
                        writer.writeEndElement();\r