[Yanel-commits] rev 50380 -
public/yanel/trunk/src/resources/xml/src/java/org/wyona/yanel/impl/resources
michi at wyona.com
michi at wyona.com
Sun Jun 13 20:32:08 CEST 2010
Author: michi
Date: 2010-06-13 20:32:08 +0200 (Sun, 13 Jun 2010)
New Revision: 50380
Modified:
public/yanel/trunk/src/resources/xml/src/java/org/wyona/yanel/impl/resources/XMLResource.java
Log:
tidy added
Modified: public/yanel/trunk/src/resources/xml/src/java/org/wyona/yanel/impl/resources/XMLResource.java
===================================================================
--- public/yanel/trunk/src/resources/xml/src/java/org/wyona/yanel/impl/resources/XMLResource.java 2010-06-13 17:50:25 UTC (rev 50379)
+++ public/yanel/trunk/src/resources/xml/src/java/org/wyona/yanel/impl/resources/XMLResource.java 2010-06-13 18:32:08 UTC (rev 50380)
@@ -91,15 +91,22 @@
if (log.isDebugEnabled()) log.debug("Yanel Path: " + yanelPath);
if (yanelPath.startsWith("yanelrepo:") || yanelPath.startsWith("yanelresource:") || yanelPath.startsWith("http:")) {
log.debug("Protocol/Scheme used: " + yanelPath);
- SourceResolver resolver = new SourceResolver(this);
- Source source = resolver.resolve(yanelPath, null);
- InputStream in;
+ // TODO: URL Re-writing (see for example http://j2ep.sourceforge.net/docs/rewrite.html)
try {
- in = org.wyona.commons.xml.XMLHelper.isWellFormed(((javax.xml.transform.stream.StreamSource) source).getInputStream());
- return in;
+ SourceResolver resolver = new SourceResolver(this);
+ Source source = resolver.resolve(yanelPath, null);
+ return org.wyona.commons.xml.XMLHelper.isWellFormed(((javax.xml.transform.stream.StreamSource) source).getInputStream());
} catch(Exception e) {
- StringBuilder sb = new StringBuilder("<exception>Date retrieved from '" + yanelPath + "' not well-formed!</exception>");
+ String exceptionMessage = "Data retrieved from '" + yanelPath + "' not well-formed!";
+ log.warn(exceptionMessage);
+/*
+ StringBuilder sb = new StringBuilder("<exception>" + exceptionMessage + "</exception>");
return new java.io.ByteArrayInputStream(sb.toString().getBytes());
+*/
+ SourceResolver resolver = new SourceResolver(this);
+ Source source = resolver.resolve(yanelPath, null);
+ return tidy(((javax.xml.transform.stream.StreamSource) source).getInputStream());
+ //return tidy(intercept(((javax.xml.transform.stream.StreamSource) source).getInputStream()));
}
} else {
log.info("No protocol used.");
@@ -626,4 +633,46 @@
public String getWorkflowIntrospection() throws WorkflowException {
return WorkflowHelper.getWorkflowIntrospection(this);
}
+
+ /**
+ * Tidy HTML
+ * @return well-formed XHTML
+ */
+ private InputStream tidy(InputStream in) throws Exception {
+ log.warn("Tidy HTML ...");
+ org.w3c.tidy.Tidy tidy = new org.w3c.tidy.Tidy();
+ tidy.setXHTML(true);
+ tidy.setNumEntities(true);
+/*
+ tidy.setTidyMark(false);
+ tidy.setInputEncoding("utf-8");
+ tidy.setOutputEncoding("utf-8");
+*/
+
+ java.io.ByteArrayOutputStream out = new java.io.ByteArrayOutputStream();
+ tidy.parse(in, out);
+ in.close();
+ return new java.io.ByteArrayInputStream(out.toByteArray());
+ //return intercept(new java.io.ByteArrayInputStream(out.toByteArray()));
+ }
+
+ /**
+ * Intercept InputStream and log content ...
+ */
+ private InputStream intercept(InputStream in) throws java.io.IOException {
+ java.io.ByteArrayOutputStream baos = new java.io.ByteArrayOutputStream();
+ byte[] buf = new byte[8192];
+ int bytesR;
+ while ((bytesR = in.read(buf)) != -1) {
+ baos.write(buf, 0, bytesR);
+ }
+
+ // Buffer within memory (TODO: Maybe replace with File-buffering ...)
+ // http://www-128.ibm.com/developerworks/java/library/j-io1/
+ byte[] memBuffer = baos.toByteArray();
+
+ log.warn("DEBUG: InputStream: " + baos);
+
+ return new java.io.ByteArrayInputStream(memBuffer);
+ }
}
More information about the Yanel-commits
mailing list