153 lines
5.1 KiB
Java
153 lines
5.1 KiB
Java
package net.yacy.document.parser.rdfa.impl;
|
|
|
|
import java.io.BufferedReader;
|
|
import java.io.File;
|
|
import java.io.IOException;
|
|
import java.io.Reader;
|
|
import java.util.ArrayList;
|
|
import java.util.Locale;
|
|
|
|
import javax.xml.transform.Templates;
|
|
import javax.xml.transform.Transformer;
|
|
import javax.xml.transform.TransformerConfigurationException;
|
|
import javax.xml.transform.TransformerException;
|
|
import javax.xml.transform.TransformerFactory;
|
|
import javax.xml.transform.stream.StreamResult;
|
|
import javax.xml.transform.stream.StreamSource;
|
|
import net.yacy.cora.util.ConcurrentLog;
|
|
import net.yacy.document.parser.rdfa.IRDFaTriple;
|
|
import net.yacy.search.Switchboard;
|
|
|
|
public class RDFaTripleImpl{
|
|
|
|
private static Templates templates = null;
|
|
private String propertyURI = null;
|
|
private String subjectURI = null;
|
|
private String subjectNodeURI = null;
|
|
private String objectURI = null;
|
|
private String objectNodeURI = null;
|
|
private String value = null;
|
|
private String dataType = null;
|
|
private String language = null;
|
|
private final Reader in;
|
|
private final Transformer aTransformer;
|
|
private final ArrayList<IRDFaTriple> allRDFaTriples = new ArrayList<IRDFaTriple>();
|
|
|
|
|
|
public RDFaTripleImpl(Reader in, String base) throws IOException,
|
|
TransformerException, TransformerConfigurationException {
|
|
|
|
BufferedReader bufReader = new BufferedReader(in);
|
|
bufReader.mark(2048); // mark position for following reset
|
|
String readLine = bufReader.readLine();
|
|
if (!readLine.toLowerCase(Locale.ROOT).contains("<!doctype")){
|
|
bufReader.reset();
|
|
}
|
|
|
|
if (templates == null) {
|
|
File f = new File(Switchboard.getSwitchboard() != null ? Switchboard.getSwitchboard().appPath : null, "defaults" + File.separatorChar + "RDFaParser.xsl");
|
|
try {
|
|
StreamSource aSource = new StreamSource(f);
|
|
TransformerFactory aFactory = TransformerFactory.newInstance();
|
|
templates = aFactory.newTemplates(aSource);
|
|
} catch(Exception e){
|
|
ConcurrentLog.severe("RDFA PARSER", "XSL template could not be loaded from " + f.toString());
|
|
}
|
|
}
|
|
this.aTransformer = templates.newTransformer();
|
|
this.aTransformer.setParameter("parser", this);
|
|
this.aTransformer.setParameter("url", base);
|
|
|
|
this.in = bufReader;
|
|
}
|
|
|
|
public IRDFaTriple[] parse() {
|
|
try {
|
|
this.aTransformer.transform(new StreamSource(this.in), new StreamResult(System.out));
|
|
} catch (final TransformerException e) {
|
|
ConcurrentLog.warn("RDFA PARSER", "Error while reading RDFa");
|
|
// e.printStackTrace();
|
|
}
|
|
|
|
return this.allRDFaTriples .toArray(new IRDFaTriple[]{});
|
|
|
|
}
|
|
|
|
public static boolean flushDataProperty(Object oparser) {
|
|
RDFaTripleImpl parser = ((RDFaTripleImpl)oparser);
|
|
|
|
parser.reportDataProperty(parser.subjectURI, parser.subjectNodeURI, parser.propertyURI,
|
|
parser.value, parser.dataType, parser.language, parser.objectNodeURI, parser.objectURI);
|
|
nullAllValues(parser);
|
|
return true;
|
|
}
|
|
|
|
private void reportDataProperty(String subjectURI, String subjectNodeURI,
|
|
String propertyURI, String value, String dataType,
|
|
String language, String objectNodeURI, String objectURI) {
|
|
IRDFaTriple triple = new RDFaTripleContent(subjectURI,subjectNodeURI,propertyURI,value,dataType,language, objectNodeURI,objectURI);
|
|
this.allRDFaTriples.add(triple);
|
|
}
|
|
|
|
private static void nullAllValues(RDFaTripleImpl parser) {
|
|
parser.propertyURI = null;
|
|
parser.subjectURI = null;
|
|
parser.subjectNodeURI = null;
|
|
parser.objectURI = null;
|
|
parser.objectNodeURI = null;
|
|
parser.value = null;
|
|
parser.dataType = null;
|
|
parser.language = null;
|
|
}
|
|
|
|
public static boolean flushObjectProperty(Object oparser) {
|
|
RDFaTripleImpl parser = ((RDFaTripleImpl)oparser);
|
|
// System.out.println("parser added");
|
|
nullAllValues(parser);
|
|
return true;
|
|
}
|
|
|
|
public static boolean setTheDatatype(Object parser, String theDatatype) {
|
|
((RDFaTripleImpl)parser).dataType = theDatatype;
|
|
System.out.println(theDatatype);
|
|
return true;
|
|
}
|
|
|
|
public static boolean setTheLanguage(Object parser, String theLanguage) {
|
|
((RDFaTripleImpl)parser).language = theLanguage;
|
|
return true;
|
|
}
|
|
|
|
public static boolean setTheObjectNodeID(Object parser, String theObjectNodeID) {
|
|
((RDFaTripleImpl)parser).objectNodeURI = theObjectNodeID;
|
|
return true;
|
|
}
|
|
|
|
public static boolean setTheObjectURI(Object parser, String theObjectURI) {
|
|
((RDFaTripleImpl)parser).objectURI = theObjectURI;
|
|
return true;
|
|
}
|
|
|
|
public static boolean setThePropertyURI(Object parser, String thePropertyURI) {
|
|
((RDFaTripleImpl)parser).propertyURI = thePropertyURI;
|
|
return true;
|
|
}
|
|
|
|
|
|
public static boolean setTheSubjectNodeID(Object parser, String theSubjectNodeID) {
|
|
((RDFaTripleImpl)parser).subjectNodeURI = theSubjectNodeID;
|
|
System.out.println(theSubjectNodeID);
|
|
return true;
|
|
}
|
|
|
|
public static boolean setTheSubjectURI(Object parser, String theSubjectURI) {
|
|
((RDFaTripleImpl)parser).subjectURI = theSubjectURI;
|
|
return true;
|
|
}
|
|
|
|
public static boolean setTheValue(Object parser, String theValue) {
|
|
((RDFaTripleImpl)parser).value = theValue;
|
|
return true;
|
|
}
|
|
}
|