From 0ba4613edc766d6efb70aeebfd4cc017fa963f39 Mon Sep 17 00:00:00 2001 From: Jan-Pascal van Best Date: Thu, 29 Mar 2012 17:52:24 +0200 Subject: [PATCH] Work on RTL: sub-titles, subtitles, presenters, urls; debug code to write RTL output to a mysql database for analysis --- .gitignore | 2 + pom.xml | 5 + .../java/org/vanbest/xmltv/Programme.java | 109 ++++++++++++---- src/main/java/org/vanbest/xmltv/RTL.java | 122 ++++++++++++------ 4 files changed, 172 insertions(+), 66 deletions(-) diff --git a/.gitignore b/.gitignore index 1293e4d..9f114ba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /test.xml +/rtl.xml /target +/testdb.* diff --git a/pom.xml b/pom.xml index 2a7aa91..1a5e167 100644 --- a/pom.xml +++ b/pom.xml @@ -31,6 +31,11 @@ hsqldb 2.2.8 + + mysql + mysql-connector-java + 5.1.18 + diff --git a/src/main/java/org/vanbest/xmltv/Programme.java b/src/main/java/org/vanbest/xmltv/Programme.java index 2e53b50..08fbc49 100644 --- a/src/main/java/org/vanbest/xmltv/Programme.java +++ b/src/main/java/org/vanbest/xmltv/Programme.java @@ -15,6 +15,10 @@ public class Programme { class Title { String title; String lang; + public Title(String title, String lang) { + this.title = title; + this.lang = lang; + } } class Actor { String name; @@ -55,6 +59,10 @@ public class Programme { boolean present; String stereo; // 'mono','stereo','dolby','dolby digital','bilingual' or 'surround'. } + class Subtitle { + String type; // teletext | onscreen | deaf-signed + Title language; + } public Date startTime; // required public Date endTime; public Date pdcStart; @@ -65,7 +73,7 @@ public class Programme { public String clumpidx; public List titles; // at least one - public List<Title> subtitles; + public List<Title> secondaryTitles; public List<Title> descriptions; public Credits credits; public Date date; // copyright date, original date @@ -74,37 +82,88 @@ public class Programme { Title origLanguage; Length length; public List<Icon> icons; - public List<URL> urls; + public List<String> urls; public List<Title> countries; public List<Episode> episodes; public Video video; public Audio audio; /* previously-shown?, premiere?, last-chance?, new?, - subtitles*, rating*, star-rating*, review* */ - + public List<Subtitle> subtitles; + /*rating*, star-rating*, review* + */ + public void addTitle(String title) { addTitle(title, null); } public void addTitle(String title, String lang) { if(titles==null) titles = new ArrayList<Title>(); - Title t = new Title(); - t.title = title; - t.lang = lang; - titles.add(t); + titles.add(new Title(title,lang)); + } + public void addSecondaryTitle(String title) { + addSecondaryTitle(title,null); + } + public void addSecondaryTitle(String title, String lang) { + if(secondaryTitles==null) secondaryTitles = new ArrayList<Title>(); + secondaryTitles.add(new Title(title,lang)); } + public void addCategory(String category) { addCategory(category, null); } public void addCategory(String category, String lang) { if(categories==null) categories = new ArrayList<Title>(); - Title t = new Title(); - t.title = category; - t.lang = lang; - categories.add(t); + categories.add(new Title(category,lang)); + } + public void addSubtitle(String type) { + addCategory(type, null); + } + public void addSubtitle(String type, String language, String language_lang) { + if(subtitles==null) subtitles = new ArrayList<Subtitle>(); + Subtitle s = new Subtitle(); + s.type = type; + if (language != null) { + s.language = new Title(language,language_lang); + } + subtitles.add(s); } + public void addPresenter(String pres) { + if (credits == null) credits = new Credits(); + if (credits.presenters==null) { + credits.presenters=new ArrayList<String>(); + } + credits.presenters.add(pres); + } + public void addUrl(String url) { + if(urls==null) urls=new ArrayList<String>(); + urls.add(url); + } + private void writeTitle(Title title, String tag, + XMLStreamWriter writer) throws XMLStreamException { + if(title==null) return; + writer.writeStartElement(tag); + if (title.lang != null) writer.writeAttribute("lang", title.lang); + if (title.title != null) writer.writeCharacters(title.title); + writer.writeEndElement(); + } + private void writeTitleList(List<Title> titles, String tag, + XMLStreamWriter writer) throws XMLStreamException { + if(titles==null) return; + for(Title title: titles) { + writeTitle(title,tag,writer); + } + } + private void writeStringList(List<String> strings, String tag, + XMLStreamWriter writer) throws XMLStreamException { + if(strings==null) return; + for(String s:strings) { + writer.writeStartElement(tag); + writer.writeCharacters(s); + writer.writeEndElement(); + } + } public void serialize(XMLStreamWriter writer) throws XMLStreamException { DateFormat df = new SimpleDateFormat("yyyyMMddHHmmss Z"); @@ -112,19 +171,20 @@ public class Programme { if(startTime != null) writer.writeAttribute("start", df.format(startTime)); if(endTime != null) writer.writeAttribute("stop", df.format(endTime)); if(channel != null) writer.writeAttribute("channel", ""+channel.id); - if(titles != null) { - for(Title title: titles) { - writer.writeStartElement("title"); - if (title.lang != null) writer.writeAttribute("lang", title.lang); - if (title.title != null) writer.writeCharacters(title.title); - writer.writeEndElement(); - } + writeTitleList(titles,"title",writer); + writeTitleList(secondaryTitles,"sub-title", writer); + if(credits != null) { + writer.writeStartElement("credits"); + writeStringList(credits.presenters,"presenter",writer); + writer.writeEndElement(); } - if(categories != null) { - for(Title category: categories) { - writer.writeStartElement("category"); - if (category.lang != null) writer.writeAttribute("lang", category.lang); - if (category.title != null) writer.writeCharacters(category.title); + writeTitleList(categories, "category", writer); + writeStringList(urls,"url",writer); + if(subtitles != null) { + for(Subtitle s: subtitles) { + writer.writeStartElement("subtitles"); + if (s.type != null) writer.writeAttribute("type", s.type); + if (s.language != null) writeTitle(s.language,"language",writer); writer.writeEndElement(); } } @@ -140,5 +200,4 @@ public class Programme { writer.writeEndElement(); writer.writeCharacters(System.getProperty("line.separator")); } - } diff --git a/src/main/java/org/vanbest/xmltv/RTL.java b/src/main/java/org/vanbest/xmltv/RTL.java index 955dc70..891ede2 100644 --- a/src/main/java/org/vanbest/xmltv/RTL.java +++ b/src/main/java/org/vanbest/xmltv/RTL.java @@ -1,8 +1,11 @@ package org.vanbest.xmltv; import java.io.BufferedReader; +import java.io.FileInputStream; import java.io.FileNotFoundException; +import java.io.FileWriter; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; @@ -21,6 +24,7 @@ import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Properties; import java.util.Set; import javax.xml.parsers.DocumentBuilderFactory; @@ -39,6 +43,7 @@ import net.sf.json.JSON; import net.sf.json.JSONArray; import net.sf.json.JSONObject; +import org.apache.commons.io.FileUtils; import org.vanbest.xmltv.EPGSource.Stats; import org.w3c.dom.DOMException; import org.w3c.dom.Document; @@ -53,7 +58,7 @@ public class RTL extends AbstractEPGSource implements EPGSource { private static final String detail_url="http://www.rtl.nl/active/epg_data/uitzending_data/"; private static final String icon_url="http://www.rtl.nl/service/gids/components/vaste_componenten/"; private static final String xmltv_channel_suffix = ".rtl.nl"; - private static final int MAX_PROGRAMMES_PER_DAY = 99999; + private static final int MAX_PROGRAMMES_PER_DAY = 200000; private Connection db; @@ -68,25 +73,36 @@ public class RTL extends AbstractEPGSource implements EPGSource { } } - public RTL(Config config) { + public RTL(Config config, boolean useDB) { super(config); try { - db = DriverManager.getConnection("jdbc:hsqldb:file:testdb", "SA", ""); - Statement stat = db.createStatement(); - StringBuilder s = new StringBuilder(); - s.append("CREATE TABLE IF NOT EXISTS prog (id VARCHAR(32) primary key, "); - int i=0; - for( String key: xmlKeys) { - if(i>0) s.append(", "); - xmlKeyMap.put(key, i+1); - s.append(key); - s.append(" VARCHAR(4096)"); - i++; + if (useDB) { + Properties dbProp = new Properties(); + try { + InputStream in = new FileInputStream("tv_grab_nl_java.db.properties"); + dbProp.load(in); + } catch (IOException e) { + e.printStackTrace(); + } + db = DriverManager.getConnection(dbProp.getProperty("db_url"), dbProp.getProperty("db_user"), dbProp.getProperty("db_passwd")); + Statement stat = db.createStatement(); + StringBuilder s = new StringBuilder(); + s.append("CREATE TABLE IF NOT EXISTS prog (id VARCHAR(32) primary key, "); + int i=0; + for( String key: xmlKeys) { + if(i>0) s.append(", "); + xmlKeyMap.put(key, i+1); + s.append(key); + s.append(" TEXT"); + i++; + } + s.append(");"); + System.out.println(s); + stat.execute(s.toString()); + stat.execute("TRUNCATE TABLE prog"); + } else { + db = null; } - s.append(");"); - System.out.println(s); - stat.execute(s.toString()); - stat.execute("TRUNCATE TABLE prog"); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -165,21 +181,24 @@ public class RTL extends AbstractEPGSource implements EPGSource { if (root.hasAttributes()) { System.out.println("Unknown attributes for RTL detail root node"); } - StringBuilder sql = new StringBuilder("INSERT INTO prog (id"); - StringBuilder sql2= new StringBuilder(") values (?"); - for(String key:xmlKeys) { - sql.append(","); - sql.append(key); - sql2.append(","); - sql2.append("?"); - } - sql.append(sql2); - sql.append(");"); - // System.out.println(sql.toString()); - PreparedStatement stat = db.prepareStatement(sql.toString()); - stat.setString(1, id); - for(String key:xmlKeys) { - + PreparedStatement stat = null; + if (db != null) { + StringBuilder sql = new StringBuilder("INSERT INTO prog (id"); + StringBuilder sql2= new StringBuilder(") values (?"); + for(String key:xmlKeys) { + sql.append(","); + sql.append(key); + sql2.append(","); + sql2.append("?"); + } + sql.append(sql2); + sql.append(");"); + // System.out.println(sql.toString()); + stat = db.prepareStatement(sql.toString()); + stat.setString(1, id); + for(String key:xmlKeys) { + + } } NodeList nodes = root.getChildNodes(); for( int i=0; i<nodes.getLength(); i++) { @@ -204,8 +223,10 @@ public class RTL extends AbstractEPGSource implements EPGSource { continue; } } - System.out.println(stat.toString()); - stat.execute(); + //System.out.println(stat.toString()); + if (db != null) { + stat.execute(); + } } } @@ -226,8 +247,13 @@ public class RTL extends AbstractEPGSource implements EPGSource { } } Element e = (Element)n; - stat.setString(xmlKeyMap.get(e.getTagName())+1, e.getTextContent()); + if (db != null) { + stat.setString(xmlKeyMap.get(e.getTagName())+1, e.getTextContent()); + } String tag = e.getTagName(); + if (e.getTextContent().isEmpty()) { + return; + } if (tag.equals("genre")) { prog.addCategory(config.translateCategory(e.getTextContent())); } else if (tag.equals("eindtijd")) { @@ -235,16 +261,31 @@ public class RTL extends AbstractEPGSource implements EPGSource { } else if (tag.equals("omroep")) { } else if (tag.equals("kijkwijzer")) { } else if (tag.equals("presentatie")) { + // A + // A en B + // A, B, C en D + String[] presentatoren = e.getTextContent().split(", | en "); + for(String pres:presentatoren) { + prog.addPresenter(pres); + } } else if (tag.equals("wwwadres")) { + prog.addUrl(e.getTextContent()); } else if (tag.equals("alginhoud")) { } else if (tag.equals("inhoud")) { } else if (tag.equals("tt_inhoud")) { + // ignore, is summary of other fields } else if (tag.equals("zendernr")) { } else if (tag.equals("titel")) { } else if (tag.equals("bijvnwlanden")) { } else if (tag.equals("afl_titel")) { + prog.addSecondaryTitle(e.getTextContent()); } else if (tag.equals("site_path")) { } else if (tag.equals("ondertiteling")) { + if(e.getTextContent().equals("J")) { + prog.addSubtitle("teletext"); + } else { + throw new RTLException("Ignoring unknown value \"" + n.getTextContent() + "\" for tag ondertiteling"); + } } else if (tag.equals("begintijd")) { } else if (tag.equals("pgmsoort")) { } else { @@ -274,7 +315,7 @@ public class RTL extends AbstractEPGSource implements EPGSource { for( Object k: o.keySet()) { String id = genericChannelId(k.toString()); if(!channelMap.containsKey(id)) { - System.out.println("Skipping programmes for channel " + id); + if (!config.quiet) System.out.println("Skipping programmes for channel " + id); continue; } JSONArray j = (JSONArray) o.get(k); @@ -328,20 +369,19 @@ public class RTL extends AbstractEPGSource implements EPGSource { */ public static void main(String[] args) { Config config = Config.getDefaultConfig(); - System.exit(0); - RTL rtl = new RTL(config); + RTL rtl = new RTL(config, false); try { List<Channel> channels = rtl.getChannels(); System.out.println("Channels: " + channels); - XMLStreamWriter writer = XMLOutputFactory.newInstance().createXMLStreamWriter(System.out); - + XMLStreamWriter writer = XMLOutputFactory.newInstance().createXMLStreamWriter(new FileWriter("rtl.xml")); writer.writeStartDocument(); writer.writeCharacters("\n"); writer.writeDTD("<!DOCTYPE tv SYSTEM \"xmltv.dtd\">"); writer.writeCharacters("\n"); writer.writeStartElement("tv"); for(Channel c: channels) {c.serialize(writer);} - //List<Programme> programmes = rtl.getProgrammes1(channels.subList(0, 13), 0, true); + writer.flush(); + //List<Programme> programmes = rtl.getProgrammes1(channels.subList(6, 9), 0, true); List<Programme> programmes = rtl.getProgrammes1(channels, 0, true); for(Programme p: programmes) {p.serialize(writer);} writer.writeEndElement(); -- 2.39.5