...
 
Commits (7)
......@@ -19,6 +19,7 @@ Services
* `acrepo-jsonld-service`: This service creates expanded or compact JSON-LD representations of input documents
* `acrepo-mint-service`: This mints random (public) URIs for use with fedora resources
* `acrepo-xml-metadata`: This service translates Fedora RDF documents into MODS/XML or DC/XML
* `acrepo-import-service`: This service imports data from a zip/tgz file into Fedora4.
Building
--------
......@@ -46,6 +47,7 @@ command from its shell:
feature:install acrepo-jsonld-service
feature:install acrepo-mint-service
feature:install acrepo-xml-metadata
feature:install acrepo-import-service
Or by copying any of the compiled bundles into `$KARAF_HOME/deploy`.
......
Import/Ingest Service
=====================
This application implements batch imports for Fedora4 objects.
Building
--------
To build this project use
mvn install
Deploying in OSGi
-----------------
This project can be deployed in an OSGi container. For example using
[Apache ServiceMix](http://servicemix.apache.org/) or
[Apache Karaf](http://karaf.apache.org). You can run the following
command from its shell:
feature:repo-add mvn:edu.amherst.acdc/acrepo-karaf/LATEST/xml/features
feature:install acrepo-import-service
Configuration
-------------
This application can be configured by creating a file in
`$KARAF_HOME/etc/edu.amherst.acdc.importer.cfg`. The following
values are available for configuration:
In the event of failure, the maximum number of times a re-delivery will be attempted.
error.maxRedeliveries=10
If the fedora repository requires authentication, the following values
can be set:
fcrepo.authUsername=<username>
fcrepo.authPassword=<password>
fcrepo.authHost=<host realm>
The baseUrl for the fedora repository.
fcrepo.baseUrl=localhost:8080/fcrepo/rest
The directory where zip/tgz files will be placed for ingest:
importer.bundle=/tmp/imports
By editing this file, any currently running routes will be immediately redeployed
with the new values.
For more help see the Apache Camel documentation
http://camel.apache.org/
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>repository-services</artifactId>
<groupId>edu.amherst.acdc</groupId>
<version>1.0.1-SNAPSHOT</version>
</parent>
<artifactId>acrepo-import-service</artifactId>
<packaging>bundle</packaging>
<name>Object Ingest Service</name>
<description>Repository import service, OSGi blueprint bundle project.</description>
<properties>
<osgi.export.packages>edu.amherst.acdc.importer;version=${project.version}</osgi.export.packages>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.camel</groupId>
<artifactId>camel-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.camel</groupId>
<artifactId>camel-blueprint</artifactId>
</dependency>
<dependency>
<groupId>org.fcrepo.camel</groupId>
<artifactId>fcrepo-camel</artifactId>
</dependency>
<dependency>
<groupId>org.apache.camel</groupId>
<artifactId>camel-exec</artifactId>
</dependency>
<!--
<dependency>
<groupId>org.apache.camel</groupId>
<artifactId>camel-spring</artifactId>
</dependency> -->
<dependency>
<groupId>org.apache.camel</groupId>
<artifactId>camel-jetty9</artifactId>
</dependency>
<!--
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>activemq-camel</artifactId>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
</dependency>
-->
<!-- logging -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-core</artifactId>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
</dependency>
<!-- Testing & Camel Plugin -->
<!--
<dependency>
<groupId>org.apache.camel</groupId>
<artifactId>camel-test-blueprint</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<scope>test</scope>
</dependency>
-->
<!--
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</dependency>
<dependency>
<groupId>com.jayway.awaitility</groupId>
<artifactId>awaitility</artifactId>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-all</artifactId>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
<artifactId>jena-fuseki</artifactId>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.fcrepo.client</groupId>
<artifactId>fcrepo-java-client</artifactId>
<scope>test</scope>
</dependency>
-->
</dependencies>
<build>
<defaultGoal>install</defaultGoal>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
</plugin>
<!-- reserve network ports for integration testing -->
<!-- add configuration file to artifact set for OSGi deployment -->
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<configuration>
<portNames>
<portName>fcrepo.dynamic.test.port</portName>
<portName>fcrepo.dynamic.jms.port</portName>
<portName>fcrepo.dynamic.stomp.port</portName>
<portName>jetty.dynamic.stop.port</portName>
</portNames>
</configuration>
<executions>
<execution>
<id>attach-artifacts</id>
<phase>package</phase>
<goals>
<goal>attach-artifact</goal>
</goals>
<configuration>
<artifacts>
<artifact>
<file>src/main/cfg/edu.amherst.acdc.importer.cfg</file>
<type>cfg</type>
<classifier>configuration</classifier>
</artifact>
</artifacts>
</configuration>
</execution>
</executions>
</plugin>
<!-- integration testing
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
<configuration>
<argLine>${jacoco.agent.it.arg}</argLine>
<systemPropertyVariables>
<fcrepo.dynamic.jms.port>${fcrepo.dynamic.jms.port}</fcrepo.dynamic.jms.port>
<fcrepo.dynamic.stomp.port>${fcrepo.dynamic.stomp.port}</fcrepo.dynamic.stomp.port>
<fcrepo.dynamic.test.port>${fcrepo.dynamic.test.port}</fcrepo.dynamic.test.port>
<jetty.dynamic.test.port>${jetty.dynamic.test.port}</jetty.dynamic.test.port>
</systemPropertyVariables>
</configuration>
</plugin>
-->
<!-- Launch jetty for integration testing with Fedora
<plugin>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-maven-plugin</artifactId>
<configuration>
<systemProperties>
<force>true</force>
<systemProperty>
<name>fcrepo.home</name>
<value>${project.build.directory}/fcrepo-data</value>
</systemProperty>
<systemProperty>
<name>fcrepo.dynamic.jms.port</name>
<value>${fcrepo.dynamic.jms.port}</value>
</systemProperty>
<systemProperty>
<name>fcrepo.dynamic.stomp.port</name>
<value>${fcrepo.dynamic.stomp.port}</value>
</systemProperty>
</systemProperties>
<scanIntervalSeconds>10</scanIntervalSeconds>
<stopKey>STOP</stopKey>
<stopPort>${jetty.dynamic.stop.port}</stopPort>
<daemon>true</daemon>
</configuration>
<executions>
<execution>
<id>start-jetty</id>
<phase>pre-integration-test</phase>
<goals>
<goal>stop</goal>
<goal>start</goal>
</goals>
<configuration>
<httpConnector>
<port>${fcrepo.dynamic.test.port}</port>
</httpConnector>
<scanIntervalSeconds>0</scanIntervalSeconds>
<daemon>true</daemon>
<contextHandlers>
<contextHandler implementation="org.eclipse.jetty.webapp.WebAppContext">
<war>${project.build.directory}/fcrepo.war</war>
<contextPath>/fcrepo</contextPath>
</contextHandler>
</contextHandlers>
</configuration>
</execution>
<execution>
<id>stop-jetty</id>
<phase>post-integration-test</phase>
<goals>
<goal>stop</goal>
</goals>
</execution>
</executions>
</plugin>
-->
<!--Bring in fcrepo webapp for integration testing
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>${dependency.plugin.version}</version>
<executions>
<execution>
<id>copy</id>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<artifactItems>
<artifactItem>
<groupId>org.fcrepo</groupId>
<artifactId>fcrepo-webapp</artifactId>
<version>${fcrepo.version}</version>
<type>war</type>
<outputDirectory>${project.build.directory}</outputDirectory>
<destFileName>fcrepo.war</destFileName>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
-->
<!-- to generate the MANIFEST-FILE of the bundle -->
<plugin>
<groupId>org.apache.felix</groupId>
<artifactId>maven-bundle-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
# In the event of failure, the maximum number of times a redelivery will be attempted.
error.maxRedeliveries=10
# If the fedora repository requires authentication, the following values can be set:
fcrepo.authUsername=
fcrepo.authPassword=
fcrepo.authHost=
# The baseUrl for the fedora repository.
fcrepo.baseUrl=localhost:8080/fcrepo/rest
# The JMS connection URI, used for connecting to a local or remote ActiveMQ broker.
#jms.brokerUrl=tcp://localhost:61616
# The camel URI for the incoming message stream.
# TODO - not sure this will be used...?
#input.stream=activemq:topic:fedora
# The script that will ingest the data.
ingest.script=/tmp/fedora/script.py
# The directory where zip/tar.gz files should be placed
ingest.dir=/tmp/fedora/dropbox
# A working directory where the data will be unpacked and worked on
working.dir=/tmp/fedora/working
# The location of the internal Audit trail if using the fcrepo-audit extension module.
# Nodes at this location will not be serialized
#audit.container=/audit
<?xml version="1.0" encoding="UTF-8"?>
<blueprint xmlns="http://www.osgi.org/xmlns/blueprint/v1.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:cm="http://aries.apache.org/blueprint/xmlns/blueprint-cm/v1.1.0"
xsi:schemaLocation="
http://aries.apache.org/blueprint/xmlns/blueprint-cm/v1.1.0 http://aries.apache.org/schemas/blueprint-cm/blueprint-cm-1.1.0.xsd
http://www.osgi.org/xmlns/blueprint/v1.0.0 http://www.osgi.org/xmlns/blueprint/v1.0.0/blueprint.xsd
http://camel.apache.org/schema/blueprint http://camel.apache.org/schema/blueprint/camel-blueprint.xsd">
<!-- OSGi blueprint property placeholder -->
<cm:property-placeholder id="properties" persistent-id="edu.amherst.acdc.importer" update-strategy="reload">
<cm:default-properties>
<cm:property name="error.maxRedeliveries" value="10"/>
<cm:property name="fcrepo.authUsername" value=""/>
<cm:property name="fcrepo.authPassword" value=""/>
<cm:property name="fcrepo.authHost" value=""/>
<cm:property name="fcrepo.baseUrl" value="localhost:8080/fcrepo/rest"/>
<!--<cm:property name="jms.brokerUrl" value="tcp://localhost:61616"/> -->
<!-- <cm:property name="input.stream" value="activemq:topic:fedora"/> -->
<cm:property name="ingest.dir" value="/tmp/fedora/dropbox/"/>
<cm:property name="working.dir" value="/tmp/fedora/working/"/>
<cm:property name="ingest.script" value="/tmp/fedora/fedora-ingest.py"/>
<!--<cm:property name="audit.container" value="/audit"/> -->
</cm:default-properties>
</cm:property-placeholder>
<!-- component-wide configuration of fcrepo -->
<bean id="fcrepo" class="org.fcrepo.camel.FcrepoComponent">
<property name="authUsername" value="${fcrepo.authUsername}"/>
<property name="authPassword" value="${fcrepo.authPassword}"/>
<property name="authHost" value="${fcrepo.authHost}"/>
</bean>
<!-- configuration of the activemq component -->
<!--
<bean id="activemq" class="org.apache.activemq.camel.component.ActiveMQComponent">
<property name="brokerURL" value="${jms.brokerUrl}"/>
</bean>
-->
<camelContext xmlns="http://camel.apache.org/schema/blueprint">
<!--<package>edu.amherst.acdc.importer</package> -->
<route id="AcrepoMover">
<from uri="file:{{ingest.dir}}?move={{working.dir}}&amp;readLock=changed"/>
<log message="Received and moved ingest bundle"/>
<to uri="seda:importer"/>
</route>
<route id="AcrepoImporter">
<from uri="seda:importer"/>
<log message="Got ingest bundle!"/>
<setHeader headerName="CamelExecCommandArgs">
<simple>${header.CamelFileName}</simple>
</setHeader>
<to uri="exec:{{ingest.script}}"/>
<!-- <to uri="language:python:classpath:fedora-ingest.py"/> -->
<!-- no <to /> necessary - default is .camel -->
</route>
</camelContext>
</blueprint>
#!/usr/bin/python
# This is the main script - it does the majority of the work.
from __future__ import absolute_import, print_function
import sys
import requests
import os
import subprocess
print("Hello there, I received a request to do an import. Args: " + str(sys.argv))
##############################################################
# Helpers
##############################################################
# opens a zip/tgz file and returns the directory location
def unpack_file(filename):
print("Opening zip file " + filename)
if filename.endswith('tgz') :
retval, dirname = unpack_tar(filename)
elif filename.endswith('.zip') :
retval, dirname = unpack_zip(filename)
elif filename.endswith('.gzip') :
print('Not implemented yet')
retval = -1
else:
print('Unknown filetype passed in')
if retval != 0 :
print('Unable to unpack file \'{0}\''.format(filename))
return ''
else :
return dirname
def unpack_zip(filename) :
# first test and get info from the file
args = ['zipinfo', '-1', filename]
process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
dirname, unused_err = process.communicate()
retval = process.poll()
print('retval after zipinfo call is {0}'.format(retval))
# now actually unpack it.
if retval == 0:
ind = dirname.find('/')
dirname = dirname.strip('/\n')
args = ['unzip', filename]
process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
_, unused_err = process.communicate()
retval = process.poll()
if retval == 0:
print('Successfully extracted files from zip file');
else:
print('Unable to extract files from zip \'{0}\': {1}'.format(filename, retval))
print('dirname from {0} is: {1}'.format(args[0], dirname))
if retval != 0 :
print('\'unzip\' failed for file {0} with error code {1}'.format(filename, retval))
return (retval, dirname)
def unpack_tar(filename):
#check the file first and get dirname
args = ['tar', '-tzf', filename]
process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
dirname, unused_err = process.communicate()
retval = process.poll()
print('retval after tar call is {0}'.format(retval))
# now actually unpack it
if retval == 0:
print('dir is {0}'.format(dirname))
dirname = (dirname.split('\n'))[0].strip('/')
args = ['tar', '-xvf', filename]
process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
_, unused_err = process.communicate()
retval = process.poll()
if retval == 0:
print('Successfully extracted files from tgz file');
else:
print('Unable to extract files from tgz \'{0}\': {1}'.format(filename, retval))
print('dir is {0}'.format(dirname))
return (retval, dirname)
##############################################################
# End helpers
##############################################################
def ingest_data(location) :
print("ingest_data: noop")
if __name__ == '__main__':
if sys.argv[1] == None:
exit -1
theFile = sys.argv[1]
# unpack file into directory
directory = unpack_file(theFile)
print("directory that houses unpackaged files: " + directory)
# start to pick up files ...
......@@ -93,6 +93,7 @@
<feature version="${camel.version}">camel-http4</feature>
<feature version="${activemq.version}">activemq-camel</feature>
<feature version="${fcrepo-camel.version}">fcrepo-camel</feature>
<feature version="${project.version}">acrepo-jsonld-service</feature>
<bundle>mvn:edu.amherst.acdc/acrepo-jsonld-cache/${project.version}</bundle>
......@@ -144,7 +145,6 @@
<bundle>mvn:edu.amherst.acdc/acrepo-binding-memory/${project.version}</bundle>
</feature>
<feature name="acrepo-apix" version="${project.version}" resolver="(orb)" start-level="50">
<details>Installs the API-X Framework</details>
......@@ -154,4 +154,17 @@
<bundle>mvn:edu.amherst.acdc/acrepo-apix/${project.version}</bundle>
</feature>
<feature name="acrepo-import-service" version="${project.version}" resolver="(orb)" start-level="50">
<details>Installs the import service</details>
<feature version="${camel.version}">camel</feature>
<feature version="${camel.version}">camel-blueprint</feature>
<feature version="${camel.version}">camel-jetty9</feature>
<feature version="${camel.version}">camel-exec</feature>
<feature version="${fcrepo-camel.version}">fcrepo-camel</feature>
<bundle>mvn:edu.amherst.acdc/acrepo-import-service/${project.version}</bundle>
<configfile finalname="/etc/edu.amherst.acdc.importer.cfg">mvn:edu.amherst.acdc/acrepo-import-service/${project.version}/cfg/configuration</configfile>
</feature>
</features>
......@@ -81,6 +81,7 @@
<module>acrepo-jsonld-service</module>
<module>acrepo-jsonld-cache</module>
<module>acrepo-template-mustache</module>
<module>acrepo-import-service</module>
</modules>
<dependencyManagement>
......@@ -100,6 +101,11 @@
<artifactId>camel-jackson</artifactId>
<version>${camel.version}</version>
</dependency>
<dependency>
<groupId>org.apache.camel</groupId>
<artifactId>camel-exec</artifactId>
<version>${camel.version}</version>
</dependency>
<dependency>
<groupId>org.apache.camel</groupId>
<artifactId>camel-mustache</artifactId>
......