From: Jan-Pascal van Best Date: Sun, 1 Apr 2012 18:47:09 +0000 (+0200) Subject: Werk aan non-negacy tvgids class en generieke programme class X-Git-Tag: 0.9.0~5 X-Git-Url: http://www.vanbest.org/gitweb/?a=commitdiff_plain;h=2d79ed579a8a572393df0a7da34f196d8350fe8b;p=tv_grab_nl_java Werk aan non-negacy tvgids class en generieke programme class --- diff --git a/src/main/java/org/vanbest/xmltv/Programme.java b/src/main/java/org/vanbest/xmltv/Programme.java index 0bcb6c5..f7cd8d4 100644 --- a/src/main/java/org/vanbest/xmltv/Programme.java +++ b/src/main/java/org/vanbest/xmltv/Programme.java @@ -1,5 +1,9 @@ package org.vanbest.xmltv; +/* TODO + * Only partially implemented. Some fields are not implemented at all; some miss easy functions for adding; + * some aren't written to xmltv format + */ import java.io.Serializable; import java.net.URL; import java.text.DateFormat; @@ -41,18 +45,20 @@ public class Programme implements Serializable { TimeUnit unit; int count; } + /* See separate java source file for the Icon class class Icon implements Serializable { URL url; int width; int height; } + */ class Episode implements Serializable { String episode; String system; // onscreen or xmltv_ns } class Video implements Serializable { - boolean present; - boolean colour; + boolean present = true; // FIXME should be default null + boolean colour = true; // FIXME should be default null String aspect; // eg. 16:9, 4:3 String quality; // eg. 'HDTV', '800x600'. } @@ -60,10 +66,19 @@ public class Programme implements Serializable { boolean present; String stereo; // 'mono','stereo','dolby','dolby digital','bilingual' or 'surround'. } + class PreviouslyShown implements Serializable { + String start; + String channel; + } class Subtitle implements Serializable { String type; // teletext | onscreen | deaf-signed Title language; } + class Rating implements Serializable { + String system; + String value; + List icons; + } public Date startTime; // required public Date endTime; public Date pdcStart; @@ -88,11 +103,13 @@ public class Programme implements Serializable { public List episodes; public Video video; public Audio audio; + public PreviouslyShown previouslyShown; /* - previously-shown?, premiere?, last-chance?, new?, + premiere?, last-chance?, new?, */ public List subtitles; - /*rating*, star-rating*, review* + public List ratings; + /*star-rating*, review* */ public void addTitle(String title) { @@ -109,6 +126,13 @@ public class Programme implements Serializable { if(secondaryTitles==null) secondaryTitles = new ArrayList(); secondaryTitles.add(new Title(title,lang)); } + public void addDescription(String title) { + addDescription(title,null); + } + public void addDescription(String title, String lang) { + if(descriptions==null) descriptions = new ArrayList<Title>(); + descriptions.add(new Title(title,lang)); + } public void addCategory(String category) { addCategory(category, null); @@ -118,7 +142,7 @@ public class Programme implements Serializable { categories.add(new Title(category,lang)); } public void addSubtitle(String type) { - addCategory(type, null); + addSubtitle(type, null, null); } public void addSubtitle(String type, String language, String language_lang) { if(subtitles==null) subtitles = new ArrayList<Subtitle>(); @@ -136,11 +160,65 @@ public class Programme implements Serializable { } credits.presenters.add(pres); } + public void addDirector(String director) { + if (credits == null) credits = new Credits(); + if (credits.directors==null) credits.directors = new ArrayList<String>(); + credits.directors.add(director); + } + public void addActor(String name) { + addActor(name, null); + } + public void addActor(String name, String role) { + if (credits == null) credits = new Credits(); + if (credits.actors==null) credits.actors = new ArrayList<Actor>(); + Actor actor = new Actor(); + actor.name = name; + actor.role = role; + credits.actors.add(actor); + } + public void setVideoAspect(String aspect) { + if (video==null) video = new Video(); + video.aspect = aspect; + } + public void setVideoQuality(String quality) { + if (video==null) video = new Video(); + video.quality = quality; + } + public void setVideoColour(boolean colour) { + if (video==null) video = new Video(); + video.colour = colour; + } + public void setAudioStereo(String stereo) { + if (audio==null) audio = new Audio(); + audio.stereo = stereo; + } public void addUrl(String url) { if(urls==null) urls=new ArrayList<String>(); urls.add(url); } + // Convenience method, set "rerun" flag without any additional information + public void setPreviouslyShown() { + setPreviouslyShown(null, null); + } + public void setPreviouslyShown(String startTime, String channel) { + if (previouslyShown == null) previouslyShown = new PreviouslyShown(); + previouslyShown.start = startTime; + previouslyShown.channel = channel; + } + public boolean hasCategory(String category) { + for(Title t: categories) { + if (t.title.toLowerCase().equals(category)) return true; + } + return false; + } + public void addRating(String system, String value) { + if (ratings==null) ratings = new ArrayList<Rating>(); + Rating r = new Rating(); + r.system = system; + r.value = value; + } + private void writeTitle(Title title, String tag, XMLStreamWriter writer) throws XMLStreamException { if(title==null) return; @@ -156,16 +234,37 @@ public class Programme implements Serializable { writeTitle(title,tag,writer); } } + private void writeString(String s, String tag, + XMLStreamWriter writer) throws XMLStreamException { + if(s==null) return; + writer.writeStartElement(tag); + writer.writeCharacters(s); + writer.writeEndElement(); + } private void writeStringList(List<String> strings, String tag, XMLStreamWriter writer) throws XMLStreamException { if(strings==null) return; for(String s:strings) { - writer.writeStartElement(tag); - writer.writeCharacters(s); + writeString(s, tag, writer); + } + } + private void writeActorList(List<Actor> actors,XMLStreamWriter writer) throws XMLStreamException { + if(actors==null) return; + for(Actor actor: actors) { + writer.writeStartElement("actor"); + if (actor.role!=null) writer.writeAttribute("role", actor.role); + if (actor.name!=null) writer.writeCharacters(actor.name); writer.writeEndElement(); } } - public void serialize(XMLStreamWriter writer) throws XMLStreamException { + private void writeIconList(List<Icon> icons, XMLStreamWriter writer) throws XMLStreamException { + if(icons==null) return; + for(Icon i: icons) { + i.serialize(writer); + } + } + + public void serialize(XMLStreamWriter writer) throws XMLStreamException { DateFormat df = new SimpleDateFormat("yyyyMMddHHmmss Z"); writer.writeStartElement("programme"); @@ -174,13 +273,56 @@ public class Programme implements Serializable { if(channel != null) writer.writeAttribute("channel", ""+channel); writeTitleList(titles,"title",writer); writeTitleList(secondaryTitles,"sub-title", writer); + writeTitleList(descriptions, "desc", writer); if(credits != null) { writer.writeStartElement("credits"); + writeStringList(credits.directors, "director", writer); + writeActorList(credits.actors, writer); writeStringList(credits.presenters,"presenter",writer); writer.writeEndElement(); } writeTitleList(categories, "category", writer); + writeIconList(icons, writer); writeStringList(urls,"url",writer); + if (video!=null) { + writer.writeStartElement("video"); + if(!video.present) { + writer.writeStartElement("present"); + writer.writeCharacters("no"); + writer.writeEndElement(); + } + if (!video.colour) { + writer.writeStartElement("colour"); + writer.writeCharacters("no"); + writer.writeEndElement(); + } + if (video.aspect!=null) { + writer.writeStartElement("aspect"); + writer.writeCharacters(video.aspect); + writer.writeEndElement(); + } + if (video.quality!=null) { + writer.writeStartElement("quality"); + writer.writeCharacters(video.quality); + writer.writeEndElement(); + } + writer.writeEndElement(); + } + if (audio!=null) { + writer.writeStartElement("audio"); + if (audio.stereo!=null) { + writer.writeStartElement("stereo"); + writer.writeCharacters(audio.stereo); + writer.writeEndElement(); + } + writer.writeEndElement(); + } + if(previouslyShown!=null) { + writer.writeStartElement("previously-shown"); + if (previouslyShown.start!=null) writer.writeAttribute("start", previouslyShown.start); + if (previouslyShown.channel!=null) writer.writeAttribute("channel", previouslyShown.channel); + writer.writeEndElement(); + } if(subtitles != null) { for(Subtitle s: subtitles) { writer.writeStartElement("subtitles"); @@ -189,15 +331,16 @@ public class Programme implements Serializable { writer.writeEndElement(); } } -/* for(Icon i: icons) { - i.serialize(writer); - } - for(String url: urls) { - writer.writeStartElement("url"); - writer.writeCharacters(url); - writer.writeEndElement(); + if(ratings != null) { + for(Rating r: ratings) { + writer.writeStartElement("rating"); + if (r.system != null) writer.writeAttribute("system", r.system); + if (r.value != null) writeString(r.value, "value", writer); + writeIconList(icons, writer); + writer.writeEndElement(); + } + } -*/ writer.writeEndElement(); writer.writeCharacters(System.getProperty("line.separator")); } diff --git a/src/main/java/org/vanbest/xmltv/ProgrammeCache.java b/src/main/java/org/vanbest/xmltv/ProgrammeCache.java index 6368707..dda3dde 100644 --- a/src/main/java/org/vanbest/xmltv/ProgrammeCache.java +++ b/src/main/java/org/vanbest/xmltv/ProgrammeCache.java @@ -43,6 +43,7 @@ public class ProgrammeCache { private Config config; private PreparedStatement getStatement; private PreparedStatement putStatement; + private PreparedStatement removeStatement; public ProgrammeCache(Config config) { this.config = config; @@ -54,6 +55,7 @@ public class ProgrammeCache { getStatement = db.prepareStatement("SELECT programme FROM cache WHERE id=?"); putStatement = db.prepareStatement("INSERT INTO cache VALUES (?,?,?)"); + removeStatement = db.prepareStatement("DELETE FROM cache WHERE id=?"); } catch (SQLException e) { db = null; if (!config.quiet) { @@ -69,7 +71,13 @@ public class ProgrammeCache { getStatement.setString(1, id); ResultSet r = getStatement.executeQuery(); if (!r.next()) return null; // not found - return (Programme) r.getObject("programme"); + try { + Programme result = (Programme) r.getObject("programme"); + return result; + } catch (java.sql.SQLDataException e) { + removeCacheEntry(id); + return null; + } } catch (SQLException e) { if (!config.quiet) { e.printStackTrace(); @@ -78,6 +86,16 @@ public class ProgrammeCache { } } + private void removeCacheEntry(String id) { + try { + removeStatement.setString(1, id); + removeStatement.execute(); + } catch (SQLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + public void put(String id, Programme prog) { if (db == null) return; try { diff --git a/src/main/java/org/vanbest/xmltv/TvGids.java b/src/main/java/org/vanbest/xmltv/TvGids.java index 4c9b52d..bd4d142 100644 --- a/src/main/java/org/vanbest/xmltv/TvGids.java +++ b/src/main/java/org/vanbest/xmltv/TvGids.java @@ -18,6 +18,7 @@ package org.vanbest.xmltv; import java.io.BufferedReader; import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; @@ -34,6 +35,11 @@ import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamWriter; + +import org.apache.commons.lang.StringEscapeUtils; + import net.sf.ezmorph.MorpherRegistry; import net.sf.ezmorph.ObjectMorpher; import net.sf.ezmorph.object.DateMorpher; @@ -48,9 +54,11 @@ public class TvGids extends AbstractEPGSource implements EPGSource { static String programme_base_url="http://www.tvgids.nl/json/lists/programs.php"; static String detail_base_url = "http://www.tvgids.nl/json/lists/program.php"; static String html_detail_base_url = "http://www.tvgids.nl/programma/"; - - static boolean initialised = false; + static boolean initialised = false; + + private static final int MAX_PROGRAMMES_PER_DAY = 9999; + private ProgrammeCache cache; public TvGids(Config config) { @@ -115,6 +123,30 @@ public class TvGids extends AbstractEPGSource implements EPGSource { return new URL(s.toString()); } + List<String> parseKijkwijzer(String s) { + List<String> result = new ArrayList<String>(); + for (int i=0; i<s.length(); i++) { + char c = s.charAt(i); + switch(c) { + case 'a':result.add("Angst"); break; + case 'd':result.add("Discriminatie"); break; + case 's':result.add("Seks"); break; + case 'h':result.add("Drugs/Alcohol"); break; + case 'g':result.add("Geweld"); break; + case 't':result.add("Grof taalgebruik"); break; + case '1':result.add("Voor alle leeftijden"); break; + case '2':result.add("Afgeraden voor kinderen jonger dan 6 jaar"); break; + case '9':result.add("Afgeraden voor kinderen jonger dan 9 jaar"); break; + case '3':result.add("Afgeraden voor kinderen jonger dan 12 jaar"); break; + case '4':result.add("Afgeraden voor kinderen jonger dan 16 jaar"); break; + default: if (!config.quiet) { + System.out.println("Unknown kijkwijzer character: " + c + " in string " + s); + } + } + } + return result; + } + /* (non-Javadoc) * @see org.vanbest.xmltv.EPGSource#getChannels() */ @@ -171,8 +203,8 @@ public class TvGids extends AbstractEPGSource implements EPGSource { * @see org.vanbest.xmltv.EPGSource#getProgrammes(java.util.List, int, boolean) */ //@Override - public Set<Programme> getProgrammes1(List<Channel> channels, int day, boolean fetchDetails) throws Exception { - Set<Programme> result = new HashSet<Programme>(); + public List<Programme> getProgrammes1(List<Channel> channels, int day, boolean fetchDetails) throws Exception { + List<Programme> result = new ArrayList<Programme>(); URL url = programmeUrl(channels, day); JSONObject jsonObject = fetchJSON(url); @@ -182,19 +214,22 @@ public class TvGids extends AbstractEPGSource implements EPGSource { JSON ps = (JSON) jsonObject.get(c.id); if ( ps.isArray() ) { JSONArray programs = (JSONArray) ps; - for( int i=0; i<programs.size(); i++ ) { + for( int i=0; i<programs.size() && i<MAX_PROGRAMMES_PER_DAY; i++ ) { JSONObject programme = programs.getJSONObject(i); Programme p = programmeFromJSON(programme, fetchDetails); p.channel = c.getXmltvChannelId(); - result.add( p ); + result.add(p); } } else { JSONObject programs = (JSONObject) ps; + int count = 0; for( Object o: programs.keySet() ) { + if (count>MAX_PROGRAMMES_PER_DAY) break; JSONObject programme = programs.getJSONObject(o.toString()); Programme p = programmeFromJSON(programme, fetchDetails); p.channel = c.getXmltvChannelId(); - result.add( p ); + result.add(p); + count++; } } } @@ -224,16 +259,29 @@ public class TvGids extends AbstractEPGSource implements EPGSource { if (result == null) { stats.cacheMisses++; result = new Programme(); - // TODO other fields - result.addTitle(programme.getString("title")); } else { stats.cacheHits++; } + System.out.println(" titel:" + programme.getString("titel")); + //System.out.println("datum_start:" + programme.getString("datum_start")); + //System.out.println(" datum_end:" + programme.getString("datum_end")); + //System.out.println(" genre:" + programme.getString("genre")); SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", new Locale("nl")); result.startTime = df.parse(programme.getString("datum_start")); result.endTime = df.parse(programme.getString("datum_end")); - - // p.fixup(config); + result.addTitle(programme.getString("titel")); + String genre = programme.getString("genre"); + if (genre != null && !genre.isEmpty()) result.addCategory(config.translateCategory(genre)); + String kijkwijzer = programme.getString("kijkwijzer"); + if (kijkwijzer!=null && !kijkwijzer.isEmpty()) { + List<String> list = parseKijkwijzer(kijkwijzer); + for(String s: list) { + result.addRating("kijkwijzer", s); + // TODO add icon from HTML detail page + } + + } + // TODO other fields if (fetchDetails && !cached) { fillDetails(id, result); @@ -248,6 +296,25 @@ public class TvGids extends AbstractEPGSource implements EPGSource { return result; } + private void fillDetails(String id, Programme result) throws Exception { + fillJSONDetails(id, result); + fillScraperDetails(id, result); + + if ((result.secondaryTitles==null || result.secondaryTitles.isEmpty()) && + (!result.hasCategory("movies") && !result.hasCategory("film"))) { + for(Programme.Title t: result.titles) { + String[] parts = t.title.split("\\s*:\\s*", 2); + if (parts.length >= 2 ) { + if (!config.quiet) { + System.out.println("Splitting title from \"" + t.title + "\" to: \"" + parts[0].trim() + "\"; sub-title: \"" + parts[1].trim() + "\""); + } + t.title = parts[0].trim(); + result.addSecondaryTitle(parts[1].trim()); + } + } + } + } + /* * {"db_id":"12436404", * "titel":"RTL Boulevard", @@ -262,20 +329,71 @@ public class TvGids extends AbstractEPGSource implements EPGSource { * "regisseur":"", * "zender_id":"4"} */ - private void fillDetails(String id, Programme result) throws Exception { + private void fillJSONDetails(String id, Programme result) throws Exception { + URL url = JSONDetailUrl(id); + JSONObject json = fetchJSON(url); + Set<String> keys = json.keySet(); + for(String key: keys) { + String value = StringEscapeUtils.unescapeHtml(json.getString(key)); + if (value.isEmpty()) continue; + if(key.equals("synop")) { + value = value.replaceAll("<br>", " "). + replaceAll("<br />", " "). + replaceAll("<p>", " "). + replaceAll("</p>", " "). + replaceAll("<strong>", " "). + replaceAll("</strong>", " "). + replaceAll("<em>", " "). + replaceAll("</em>", " "). + trim(); + result.addDescription(value); + } else if (key.equals("presentatie")) { + String[] parts = value.split(","); + for (String s: parts) { + result.addPresenter(s.trim()); + } + } else if (key.equals("acteursnamen_rolverdeling")) { + // TODO hoe zouden rollen kunnen worden aangegeven? Geen voorbeelden van gezien. + String[] parts = value.split(","); + for (String s: parts) { + result.addActor(s.trim()); + } + } else if (key.equals("regisseur")) { + String[] parts = value.split(","); + for (String s: parts) { + result.addDirector(s.trim()); + } + } else if (key.equals("kijkwijzer")) { + // TODO + } else if (key.equals("db_id")) { + // ignore + } else if (key.equals("titel")) { + // ignore + } else if (key.equals("datum")) { + // ignore + } else if (key.equals("btijd")) { + // ignore + } else if (key.equals("etijd")) { + // ignore + } else if (key.equals("genre")) { + // ignore + } else if (key.equals("zender_id")) { + // ignore + } else { + if (!config.quiet) { + System.out.println("Unknown key in tvgids.nl json details: \"" + key + "\""); + } + } + } + } + + private void fillScraperDetails(String id, Programme result) throws Exception { Pattern progInfoPattern = Pattern.compile("prog-info-content.*prog-info-footer", Pattern.DOTALL); Pattern infoLinePattern = Pattern.compile("<li><strong>(.*?):</strong>(.*?)</li>"); Pattern HDPattern = Pattern.compile("HD \\d+[ip]?"); Pattern kijkwijzerPattern = Pattern.compile("<img src=\"http://tvgidsassets.nl/img/kijkwijzer/.*?\" alt=\"(.*?)\" />"); - - URL url = JSONDetailUrl(id); - JSONObject json = fetchJSON(url); - //result.details = (TvGidsProgrammeDetails) JSONObject.toBean(json, TvGidsProgrammeDetails.class); - - //TODO fill result objecy from json object - - url = HTMLDetailUrl(id); + URL url = HTMLDetailUrl(id); String clob=fetchURL(url); //System.out.println("clob:"); //System.out.println(clob); @@ -297,17 +415,17 @@ public class TvGids extends AbstractEPGSource implements EPGSource { if (item.toLowerCase().contains("teletekst")) { result.addSubtitle("teletext"); } else if (item.toLowerCase().contains("breedbeeld")) { - //result.details.breedbeeld = true; + result.setVideoAspect("16:9"); } else if (value.toLowerCase().contains("zwart")) { - //result.details.blacknwhite = true; + result.setVideoColour(false); } else if (value.toLowerCase().contains("stereo")) { - //result.details.stereo = true; + result.setAudioStereo("stereo"); } else if (value.toLowerCase().contains("herhaling")) { - //result.details.herhaling = true; + result.setPreviouslyShown(); } else { Matcher m3 = HDPattern.matcher(value); if (m3.find()) { - //result.details.quality = m3.group(); + result.setVideoQuality(m3.group()); } else { if (!config.quiet) System.out.println(" Unknown value in 'bijzonderheden': " + item); } @@ -327,12 +445,9 @@ public class TvGids extends AbstractEPGSource implements EPGSource { // System.out.println(" kijkwijzer: " + kijkwijzer); } } + } // result.details.fixup(result, config.quiet); -// cache.add(result.db_id, result.details); - } else { - stats.cacheHits++; - } } @Override @@ -342,4 +457,43 @@ public class TvGids extends AbstractEPGSource implements EPGSource { // dummy, wait for superclass and interface to be generalised return null; } + + + /** + * @param args + */ + public static void main(String[] args) { + Config config = Config.getDefaultConfig(); + TvGids gids = new TvGids(config); + try { + List<Channel> channels = gids.getChannels(); + System.out.println("Channels: " + channels); + XMLStreamWriter writer = XMLOutputFactory.newInstance().createXMLStreamWriter(new FileWriter("tvgids.xml")); + writer.writeStartDocument(); + writer.writeCharacters("\n"); + writer.writeDTD("<!DOCTYPE tv SYSTEM \"xmltv.dtd\">"); + writer.writeCharacters("\n"); + writer.writeStartElement("tv"); + //List<Channel> my_channels = channels; + List<Channel> my_channels = channels.subList(0,15); + for(Channel c: my_channels) {c.serialize(writer);} + writer.flush(); + List<Programme> programmes = gids.getProgrammes1(my_channels, 2, true); + for(Programme p: programmes) {p.serialize(writer);} + writer.writeEndElement(); + writer.writeEndDocument(); + writer.flush(); + if (!config.quiet) { + EPGSource.Stats stats = gids.getStats(); + System.out.println("Number of programmes from cache: " + stats.cacheHits); + System.out.println("Number of programmes fetched: " + stats.cacheMisses); + System.out.println("Number of fetch errors: " + stats.fetchErrors); + } + gids.close(); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + }