[Yanel-commits] rev 50380 - public/yanel/trunk/src/resources/xml/src/java/org/wyona/yanel/impl/resources

michi at wyona.com michi at wyona.com
Sun Jun 13 20:32:08 CEST 2010


Author: michi
Date: 2010-06-13 20:32:08 +0200 (Sun, 13 Jun 2010)
New Revision: 50380

Modified:
   public/yanel/trunk/src/resources/xml/src/java/org/wyona/yanel/impl/resources/XMLResource.java
Log:
tidy added

Modified: public/yanel/trunk/src/resources/xml/src/java/org/wyona/yanel/impl/resources/XMLResource.java
===================================================================
--- public/yanel/trunk/src/resources/xml/src/java/org/wyona/yanel/impl/resources/XMLResource.java	2010-06-13 17:50:25 UTC (rev 50379)
+++ public/yanel/trunk/src/resources/xml/src/java/org/wyona/yanel/impl/resources/XMLResource.java	2010-06-13 18:32:08 UTC (rev 50380)
@@ -91,15 +91,22 @@
             if (log.isDebugEnabled()) log.debug("Yanel Path: " + yanelPath);
             if (yanelPath.startsWith("yanelrepo:") || yanelPath.startsWith("yanelresource:") || yanelPath.startsWith("http:")) {
                 log.debug("Protocol/Scheme used: " + yanelPath);
-                SourceResolver resolver = new SourceResolver(this);
-                Source source = resolver.resolve(yanelPath, null);
-                InputStream in;
+                // TODO: URL Re-writing (see for example http://j2ep.sourceforge.net/docs/rewrite.html)
                 try {
-                    in = org.wyona.commons.xml.XMLHelper.isWellFormed(((javax.xml.transform.stream.StreamSource) source).getInputStream());
-                    return in;
+                    SourceResolver resolver = new SourceResolver(this);
+                    Source source = resolver.resolve(yanelPath, null);
+                    return org.wyona.commons.xml.XMLHelper.isWellFormed(((javax.xml.transform.stream.StreamSource) source).getInputStream());
                 } catch(Exception e) {
-                    StringBuilder sb = new StringBuilder("<exception>Date retrieved from '" + yanelPath + "' not well-formed!</exception>");
+                    String exceptionMessage = "Data retrieved from '" + yanelPath + "' not well-formed!";
+                    log.warn(exceptionMessage);
+/*
+                    StringBuilder sb = new StringBuilder("<exception>" + exceptionMessage + "</exception>");
                     return new java.io.ByteArrayInputStream(sb.toString().getBytes());
+*/
+                    SourceResolver resolver = new SourceResolver(this);
+                    Source source = resolver.resolve(yanelPath, null);
+                    return tidy(((javax.xml.transform.stream.StreamSource) source).getInputStream());
+                    //return tidy(intercept(((javax.xml.transform.stream.StreamSource) source).getInputStream()));
                 }
             } else {
                 log.info("No protocol used.");
@@ -626,4 +633,46 @@
     public String getWorkflowIntrospection() throws WorkflowException {
         return WorkflowHelper.getWorkflowIntrospection(this);
     }
+
+    /**
+     * Tidy HTML
+     * @return well-formed XHTML
+     */
+    private InputStream tidy(InputStream in) throws Exception {
+        log.warn("Tidy HTML ...");
+        org.w3c.tidy.Tidy tidy = new org.w3c.tidy.Tidy();
+        tidy.setXHTML(true);
+        tidy.setNumEntities(true);
+/*
+        tidy.setTidyMark(false);
+        tidy.setInputEncoding("utf-8");
+        tidy.setOutputEncoding("utf-8");
+*/
+
+        java.io.ByteArrayOutputStream out = new java.io.ByteArrayOutputStream();
+        tidy.parse(in, out);
+        in.close();
+        return new java.io.ByteArrayInputStream(out.toByteArray());
+        //return intercept(new java.io.ByteArrayInputStream(out.toByteArray()));
+    }
+
+    /**
+     * Intercept InputStream and log content ...
+     */
+    private InputStream intercept(InputStream in) throws java.io.IOException {
+        java.io.ByteArrayOutputStream baos  = new java.io.ByteArrayOutputStream();
+        byte[] buf = new byte[8192];
+        int bytesR;
+        while ((bytesR = in.read(buf)) != -1) {
+            baos.write(buf, 0, bytesR);
+        }
+
+        // Buffer within memory (TODO: Maybe replace with File-buffering ...)
+        // http://www-128.ibm.com/developerworks/java/library/j-io1/
+        byte[] memBuffer = baos.toByteArray();
+
+        log.warn("DEBUG: InputStream: " + baos);
+
+        return new java.io.ByteArrayInputStream(memBuffer);
+    }
 }



More information about the Yanel-commits mailing list