From b9a63580eaf55f5c30c1cab5f4975dbe27771317 Mon Sep 17 00:00:00 2001 From: Jan-Pascal van Best Date: Tue, 27 Mar 2012 17:42:14 +0200 Subject: [PATCH] More work on RTL --- src/main/java/org/vanbest/xmltv/Config.java | 7 + .../java/org/vanbest/xmltv/Programme.java | 99 ++++++++- src/main/java/org/vanbest/xmltv/RTL.java | 198 +++++++++++++----- .../org/vanbest/xmltv/TvGidsProgramme.java | 4 +- 4 files changed, 255 insertions(+), 53 deletions(-) diff --git a/src/main/java/org/vanbest/xmltv/Config.java b/src/main/java/org/vanbest/xmltv/Config.java index 5e7bd00..818f1b8 100644 --- a/src/main/java/org/vanbest/xmltv/Config.java +++ b/src/main/java/org/vanbest/xmltv/Config.java @@ -85,6 +85,13 @@ public class Config { return FileUtils.getFile(FileUtils.getUserDirectory(), ".xmltv", "tv_grab_nl_java.cache"); } + public String translateCategory(String category) { + if(!cattrans.containsKey(category.toLowerCase())) { + return category; + } + return cattrans.get(category.toLowerCase()); + } + static private Map getDefaultCattrans() { Map result = new HashMap(); result.put("amusement", "Animated"); diff --git a/src/main/java/org/vanbest/xmltv/Programme.java b/src/main/java/org/vanbest/xmltv/Programme.java index 94c1f21..ddbe0b3 100644 --- a/src/main/java/org/vanbest/xmltv/Programme.java +++ b/src/main/java/org/vanbest/xmltv/Programme.java @@ -1,7 +1,15 @@ package org.vanbest.xmltv; +import java.net.URL; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; import java.util.Date; import java.util.List; +import java.util.concurrent.TimeUnit; + +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; public class Programme { class Title { @@ -24,13 +32,36 @@ public class Programme { List commentators; List guests; } - public Channel channel; // required + class Length { + TimeUnit unit; + int count; + } + class Icon { + URL url; + int width; + int height; + } + class Episode { + String episode; + String system; // onscreen or xmltv_ns + } + class Video { + boolean present; + boolean colour; + String aspect; // eg. 16:9, 4:3 + String quality; // eg. 'HDTV', '800x600'. + } + class Audio { + boolean present; + String stereo; // 'mono','stereo','dolby','dolby digital','bilingual' or 'surround'. + } public Date startTime; // required public Date endTime; public Date pdcStart; public Date vpsStart; public String showview; public String videoplus; + public Channel channel; // required public String clumpidx; public List titles; // at least one @@ -41,6 +72,72 @@ public class Programme { public List<Title> categories; Title language; Title origLanguage; + Length length; + public List<Icon> icons; + public List<URL> urls; + public List<Title> countries; + public List<Episode> episodes; + public Video video; + public Audio audio; + /* + previously-shown?, premiere?, last-chance?, new?, + subtitles*, rating*, star-rating*, review* + */ + public void addTitle(String title) { + addTitle(title, null); + } + public void addTitle(String title, String lang) { + if(titles==null) titles = new ArrayList<Title>(); + Title t = new Title(); + t.title = title; + t.lang = lang; + titles.add(t); + } + public void addCategory(String category) { + addCategory(category, null); + } + public void addCategory(String category, String lang) { + if(categories==null) categories = new ArrayList<Title>(); + Title t = new Title(); + t.title = category; + t.lang = lang; + categories.add(t); + } + + public void serialize(XMLStreamWriter writer) throws XMLStreamException { + DateFormat df = new SimpleDateFormat("yyyyMMddHHmmss Z"); + + writer.writeStartElement("programme"); + if(startTime != null) writer.writeAttribute("start", df.format(startTime)); + if(channel != null) writer.writeAttribute("channel", ""+channel.id); + if(titles != null) { + for(Title title: titles) { + writer.writeStartElement("title"); + if (title.lang != null) writer.writeAttribute("lang", title.lang); + if (title.title != null) writer.writeCharacters(title.title); + writer.writeEndElement(); + } + } + if(categories != null) { + for(Title category: categories) { + writer.writeStartElement("category"); + if (category.lang != null) writer.writeAttribute("lang", category.lang); + if (category.title != null) writer.writeCharacters(category.title); + writer.writeEndElement(); + } + } +/* for(Icon i: icons) { + i.serialize(writer); + } + for(String url: urls) { + writer.writeStartElement("url"); + writer.writeCharacters(url); + writer.writeEndElement(); + } +*/ + writer.writeEndElement(); + writer.writeCharacters(System.getProperty("line.separator")); + } } diff --git a/src/main/java/org/vanbest/xmltv/RTL.java b/src/main/java/org/vanbest/xmltv/RTL.java index e23e8c2..53f6491 100644 --- a/src/main/java/org/vanbest/xmltv/RTL.java +++ b/src/main/java/org/vanbest/xmltv/RTL.java @@ -9,15 +9,25 @@ import java.net.URL; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.Calendar; import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; import javax.xml.stream.XMLEventFactory; import javax.xml.stream.XMLOutputFactory; import javax.xml.stream.XMLStreamWriter; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; import net.sf.json.JSON; import net.sf.json.JSONArray; @@ -26,16 +36,24 @@ import net.sf.json.JSONObject; import org.vanbest.xmltv.EPGSource.Stats; import org.w3c.dom.Document; import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; import org.xml.sax.SAXException; public class RTL extends AbstractEPGSource implements EPGSource { - static final String programme_url="http://www.rtl.nl/active/epg_data/dag_data/"; - static final String detail_url="http://www.rtl.nl/active/epg_data/uitzending_data/"; - static final String icon_url="http://www.rtl.nl/service/gids/components/vaste_componenten/"; + private static final String programme_url="http://www.rtl.nl/active/epg_data/dag_data/"; + private static final String detail_url="http://www.rtl.nl/active/epg_data/uitzending_data/"; + private static final String icon_url="http://www.rtl.nl/service/gids/components/vaste_componenten/"; + private static final String xmltv_channel_suffix = ".rtl.nl"; + private static final int MAX_PROGRAMMES_PER_DAY = 5; + + class RTLException extends Exception { + public RTLException(String s) { + super(s); + } + } - Stats stats = new Stats(); - public RTL(Config config) { super(config); } @@ -62,7 +80,7 @@ public class RTL extends AbstractEPGSource implements EPGSource { JSONObject o = JSONObject.fromObject( json ); for( Object k: o.keySet()) { JSONArray j = (JSONArray) o.get(k); - String id = k.toString().replaceAll("^Z", ""); // remove initial Z + String id = genericChannelId(k.toString()); String name = (String) j.get(0); String icon = icon_url+id+".gif"; @@ -73,34 +91,8 @@ public class RTL extends AbstractEPGSource implements EPGSource { return result; } - protected void fetchDay(int day) throws Exception { - URL url = new URL(programme_url+day); - String xmltext = fetchURL(url); - System.out.println(xmltext); - Document xml = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(url.openStream()); - Element root = xml.getDocumentElement(); - Date date = new SimpleDateFormat("yyyy-MM-dd").parse(root.getAttribute("date")); - System.out.println("date: " + date); - String json = root.getTextContent(); - System.out.println("json: " + json); - JSONObject o = JSONObject.fromObject( json ); - for( Object k: o.keySet()) { - JSONArray j = (JSONArray) o.get(k); - System.out.println(k.toString()+": "+j.toString()); - System.out.println("Channel name:" + j.get(0)); - for (int i=1; i<j.size() && i<3; i++) { - JSONArray p = (JSONArray) j.get(i); - String starttime = p.getString(0); - String title = p.getString(1); - String id = p.getString(2); - String quark1 = p.getString(3); - String quark2 = p.getString(4); - System.out.println(" starttime: " + starttime); - System.out.println(" title: " + title); - System.out.println(" id: " + id); - fetchDetail(id); - } - } + private String genericChannelId(String jsonid) { + return jsonid.replaceAll("^Z", "")+xmltv_channel_suffix; // remove initial Z } /* @@ -128,35 +120,133 @@ public class RTL extends AbstractEPGSource implements EPGSource { * </uitzending_data> */ - private void fetchDetail(String id) throws Exception { - // TODO Auto-generated method stub - URL url = new URL(detail_url+id); - String xmltext = fetchURL(url); - System.out.println(xmltext); + private void fetchDetail(Programme prog, String id) throws Exception { + URL url = detailUrl(id); Document xml = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(url.openStream()); Element root = xml.getDocumentElement(); + if (root.hasAttributes()) { + System.out.println("Unknown attributes for RTL detail root node"); + } + NodeList nodes = root.getChildNodes(); + for( int i=0; i<nodes.getLength(); i++) { + Node n = nodes.item(i); + System.out.println(n.getNodeName()); + if (!n.getNodeName().equals("uitzending_data_item")) { + System.out.println("Ignoring RTL detail, tag " + n.getNodeName() +", full xml:"); + Transformer t = TransformerFactory.newInstance().newTransformer(); + t.transform(new DOMSource(xml),new StreamResult(System.out)); + System.out.println(); + continue; + } + // we have a uitzending_data_item node + NodeList subnodes = n.getChildNodes(); + for( int j=0; j<subnodes.getLength(); j++) { + try { + handleNode(prog, subnodes.item(j)); + } catch (RTLException e) { + System.out.println(e.getMessage()); + Transformer t = TransformerFactory.newInstance().newTransformer(); + t.transform(new DOMSource(xml),new StreamResult(System.out)); + System.out.println(); + continue; + } + } + } } - @Override - public Set<TvGidsProgramme> getProgrammes(List<Channel> channels, int day, + + private void handleNode(Programme prog, Node n) throws RTLException { + if (n.getNodeType() != Node.ELEMENT_NODE) { + throw new RTLException("Ignoring non-element node " + n.getNodeName()); + } + Element e = (Element)n; + switch (e.getTagName()) { + case "genre": + prog.addCategory(config.translateCategory(e.getTextContent())); + break; + default: + throw new RTLException("Ignoring unknown tag " + n.getNodeName() + ", content: \"" + e.getTextContent() + "\""); + } + //prog.endTime = parseTime(date, root.) + } + + public Set<Programme> getProgrammes1(List<Channel> channels, int day, boolean fetchDetails) throws Exception { - // TODO Auto-generated method stub - return null; + Set<Programme> result = new HashSet<Programme>(); + Map<String,Channel> channelMap = new HashMap<String,Channel>(); + for(Channel c: channels) { + if (c.enabled) channelMap.put(c.id, c); + } + URL url = programmeUrl(day); + //String xmltext = fetchURL(url); + //System.out.println(xmltext); + Thread.sleep(config.niceMilliseconds); + Document xml = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(url.openStream()); + Element root = xml.getDocumentElement(); + Date date = new SimpleDateFormat("yyyy-MM-dd").parse(root.getAttribute("date")); + System.out.println("date: " + date); + String json = root.getTextContent(); + System.out.println("json: " + json); + JSONObject o = JSONObject.fromObject( json ); + for( Object k: o.keySet()) { + String id = genericChannelId(k.toString()); + if(!channelMap.containsKey(id)) { + System.out.println("Skipping programmes for channel " + id); + continue; + } + JSONArray j = (JSONArray) o.get(k); + System.out.println(k.toString()+": "+j.toString()); + //System.out.println("Channel name:" + j.get(0)); + for (int i=1; i<j.size() && i<MAX_PROGRAMMES_PER_DAY; i++) { + JSONArray p = (JSONArray) j.get(i); + String starttime = p.getString(0); + String title = p.getString(1); + String programme_id = p.getString(2); + String quark1 = p.getString(3); + String quark2 = p.getString(4); + Programme prog = new Programme(); + prog.addTitle(title); + Date start = parseTime(date, starttime); + prog.startTime = start; + prog.channel = channelMap.get(id); + fetchDetail(prog, programme_id); + result.add(prog); + } + } + return result; } - @Override - public Stats getStats() { - // TODO Auto-generated method stub - return stats; + private Date parseTime(Date date, String time) { + Calendar result = Calendar.getInstance(); + result.setTime(date); + String[] parts = time.split(":"); + if(parts.length != 2) { + + } + int hour = Integer.parseInt(parts[0]); + if (hour<4) { + result.add(Calendar.DAY_OF_MONTH, 1); // early tomorrow morning + } + result.set(Calendar.HOUR_OF_DAY, hour); + result.set(Calendar.MINUTE, Integer.parseInt(parts[1])); + return result.getTime(); + } + + private static URL programmeUrl(int day) throws MalformedURLException { + return new URL(programme_url+day); + } + + private static URL detailUrl(String id) throws Exception { + return new URL(detail_url+id); } /** * @param args */ public static void main(String[] args) { - RTL rtl = new RTL(); + Config config = Config.getDefaultConfig(); + RTL rtl = new RTL(config); try { - // rtl.fetchDay(1); List<Channel> channels = rtl.getChannels(); System.out.println("Channels: " + channels); XMLStreamWriter writer = XMLOutputFactory.newInstance().createXMLStreamWriter(System.out); @@ -167,14 +257,24 @@ public class RTL extends AbstractEPGSource implements EPGSource { writer.writeCharacters("\n"); writer.writeStartElement("tv"); for(Channel c: channels) {c.serialize(writer);} + Set<Programme> programmes = rtl.getProgrammes1(channels.subList(0, 3), 0, true); + for(Programme p: programmes) {p.serialize(writer);} writer.writeEndElement(); writer.writeEndDocument(); writer.flush(); + rtl.close(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } + @Override + public Set<TvGidsProgramme> getProgrammes(List<Channel> channels, int day, + boolean fetchDetails) throws Exception { + // TODO Refactor EPGSource to return Programme instead of TvGidsProgramme + return null; + } + } diff --git a/src/main/java/org/vanbest/xmltv/TvGidsProgramme.java b/src/main/java/org/vanbest/xmltv/TvGidsProgramme.java index c78fa82..5ec1872 100644 --- a/src/main/java/org/vanbest/xmltv/TvGidsProgramme.java +++ b/src/main/java/org/vanbest/xmltv/TvGidsProgramme.java @@ -126,9 +126,7 @@ public class TvGidsProgramme { this.genre = org.apache.commons.lang.StringEscapeUtils.unescapeHtml(genre); this.soort = org.apache.commons.lang.StringEscapeUtils.unescapeHtml(soort); this.highlight_content = org.apache.commons.lang.StringEscapeUtils.unescapeHtml(highlight_content); - if(config.getCategories().containsKey(genre.toLowerCase())) { - genre = config.getCategories().get(genre.toLowerCase()); - } + genre = config.translateCategory(genre); } public String toString() { -- 2.39.5