Commit 9391ac97 authored by UPADHYAY Prajna Devi's avatar UPADHYAY Prajna Devi
Browse files

Updated jar, war and docs/core_install.md

parent d4ff3e58
......@@ -6,7 +6,7 @@ This repository includes:
- the jar file `connection-lens-core-full-1.1-SNAPSHOT.jar`,
- the python `scripts` folder (these implement an entity extraction based on Flair),
- the `models` and `lib` folders which provides linguistic models used by StanfordNLP and TreTagger tools we build upon.
- the `lib` folders which provides linguistic models used by StanfordNLP and TreeTagger tools we build upon.
2. The `gui` folder with the file `gui.war` that allows us to run the web app.
......@@ -76,7 +76,7 @@ The example below ingests 5 small data sources of different formats into a graph
Run the following command from the **core** folder with the following options:
```
java -jar connection-lens-core-full-1.1-SNAPSHOT.jar -DRDBMSDBName=cl_myinstance -i ../data/poc/2/deputes.json,../data/poc/2/fb-etienne-chouard.txt,../data/poc/2/medias.txt,../data/poc/2/tweet-Ruffin.json,../data/poc/2/rt-wikipedia.txt
java -jar connection-lens-core-full-1.1-SNAPSHOT.jar -DRDBMS_DBName=cl_myinstance -i ../data/poc/2/deputes.json,../data/poc/2/fb-etienne-chouard.txt,../data/poc/2/medias.txt,../data/poc/2/tweet-Ruffin.json,../data/poc/2/rt-wikipedia.txt
```
![image_3.png](./docs/images/image_3.png)
......@@ -159,7 +159,7 @@ First, we can query the graph using an interactive, command-line interface.
After having loaded the graph as explained above, call the code with the following options:
```
java -jar connection-lens-core-full-1.1-SNAPSHOT.jar -DRDBMSDBName=cl_myinstance -n -v -a
java -jar connection-lens-core-full-1.1-SNAPSHOT.jar -DRDBMS_DBName=cl_myinstance -n -v -a
```
The `query>` indicates that the shell is ready to accept queries.
......@@ -172,7 +172,7 @@ The `query>` indicates that the shell is ready to accept queries.
Assuming a set of queries are written in a query file (one query per line), the following call:
```
java -jar connection-lens-core-full-1.1-SNAPSHOT.jar -DRDBMSDBName=cl_myinstance -n -qs -Q ../data/poc/2/demo.queries
java -jar connection-lens-core-full-1.1-SNAPSHOT.jar -DRDBMS_DBName=cl_myinstance -n -qs -Q ../data/poc/2/demo.queries
```
......@@ -181,7 +181,7 @@ will yield a set of statistics on each query: how long it took, how many answers
#### Sample queries on the small example
Russie - 2 AT
Russie - 3 AT
Ruffin - 8 ATs
......
set CURRENT_DIR=%cd%
set CACHE_DIR=cache
set TMP_DIR=tmp
set TT_DIR=treetagger
set TT_BIN=""
Set PYTHON_SCRIPTS_DIR=scripts
set HEIDELTIME_DIR=heideltime
set TT_BIN=binWindows
set CL_DIR=%1
IF NOT EXIST %CL_DIR% ( mkdir %CL_DIR% )
ECHO "CL_DIR is :'%Cl_DIR%'"
IF NOT EXIST %Cl_DIR%\%CACHE_DIR% ( mkdir %Cl_DIR%\%CACHE_DIR% )
IF NOT EXIST %Cl_DIR%\%TMP_DIR% ( mkdir %Cl_DIR%\%TMP_DIR% )
python -m venv %CL_DIR%\cl_env
%CL_DIR%\cl_env\Scripts\python -m pip install --upgrade pip
%CL_DIR%\cl_env\Scripts\pip install -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html
IF NOT EXIST %CL_DIR%\%TT_DIR%\bin (mkdir %CL_DIR%\%TT_DIR%\bin)
IF NOT EXIST %Cl_DIR%\%TT_DIR%\cmd (mkdir %Cl_DIR%\%TT_DIR%\cmd)
IF NOT EXIST %Cl_DIR%\%TT_DIR%\lib (mkdir %Cl_DIR%\%TT_DIR%\lib)
IF NOT EXIST %Cl_DIR%\%TT_DIR%\models (mkdir %Cl_DIR%\%TT_DIR%\models)
IF NOT EXIST %Cl_DIR%\%PYTHON_SCRIPTS_DIR% (mkdir %Cl_DIR%\%PYTHON_SCRIPTS_DIR%)
IF NOT EXIST %Cl_DIR%\%HEIDELTIME_DIR% (mkdir %Cl_DIR%\%HEIDELTIME_DIR%)
xcopy core\lib\treetagger\%TT_BIN% %Cl_DIR%\%TT_DIR%\bin
copy core\lib\treetagger\cmd\utf8-tokenize.perl %Cl_DIR%\%TT_DIR%\cmd\
xcopy core\lib\treetagger\lib %Cl_DIR%\%TT_DIR%\lib
xcopy core\lib\treetagger\models %Cl_DIR%\%TT_DIR%\models
xcopy core\scripts %Cl_DIR%\%PYTHON_SCRIPTS_DIR% /S
copy core\lib\heideltime\config-heideltime-no-treetagger.props %Cl_DIR%\%HEIDELTIME_DIR%\config-heideltime.props
copy core\src\main\resources\parameter.settings %Cl_DIR%\local.settings
set ABSOLUTEPATH_MODELS= %Cl_DIR%\%TT_DIR%\models
set ABSOLUTEPATH_TMP=%Cl_DIR%\%TMP_DIR%
set ABSOLUTEPATH_CACHE=%Cl_DIR%\%CACHE_DIR%
set ABSOLUTEPATH_PYTHON=%Cl_DIR%\cl_env
set ABSOLUTEPATH_PYTHON_SCRIPTS=%Cl_DIR%\%PYTHON_SCRIPTS_DIR%
set ABSOLUTEPATH_TT=%Cl_DIR%\%TT_DIR%
set ABSOLUTEPATH_HEIDELTIME=%Cl_DIR%\%HEIDELTIME_DIR%
rem add to local.settings the absolute path to models
(ECHO.
ECHO.
ECHO #### GENERATED PARAMETERS
ECHO.) >> %Cl_DIR%\local.settings
rem add to local.settings the absolute path to tmp directory
ECHO # Temporary directory for ConnectionLens >> %Cl_DIR%\local.settings
ECHO temp_dir=%ABSOLUTEPATH_TMP% >> %Cl_DIR%\local.settings
rem add to local.settings the absolute path to the cache
ECHO # ConnectionLens cache location >> %Cl_DIR%\local.settings
ECHO cache_location=%ABSOLUTEPATH_CACHE% >> %Cl_DIR%\local.settings
rem add to local.settings the absolute path to python
ECHO # Python location >> %Cl_DIR%\local.settings
ECHO python_path=%ABSOLUTEPATH_PYTHON%\bin\python >> %Cl_DIR%\local.settings
rem add to local.settings the absolute path to python scripts
ECHO # Path to python scripts (flair and pdf) >> %Cl_DIR%\local.settings
ECHO python_script_location=%ABSOLUTEPATH_PYTHON_SCRIPTS% >> %Cl_DIR%\local.settings
rem add to local.settings the absolute paths to TreeTagger
ECHO # Treetagger location >> %Cl_DIR%\local.settings
ECHO treetagger_home=%ABSOLUTEPATH_TT% >> %Cl_DIR%\local.settings
ECHO # Stanford models location >> %Cl_DIR%\local.settings
ECHO stanford_models=%ABSOLUTEPATH_MODELS% >> %Cl_DIR%\local.settings
rem add to local.settings the path to heideltime's settings
ECHO # Configuration file used by HeidelTime (date extractor) >> %Cl_DIR%\local.settings
ECHO config_heideltime=%ABSOLUTEPATH_HEIDELTIME%\config-heideltime.props >> %Cl_DIR%\local.settings
rem add to config-heideltime.props the absolute path to TreeTagger
(ECHO.
ECHO.
ECHO #### GENERATED PARAMETERS
ECHO.) >> %Cl_DIR%\%HEIDELTIME_DIR%\config-heideltime.props
ECHO treeTaggerHome=%ABSOLUTEPATH_TT% >> %Cl_DIR%\%HEIDELTIME_DIR%\config-heideltime.props
copy %Cl_DIR%\local.settings core\src\main\resources
copy %Cl_DIR%\local.settings gui\WebContent\WEB-INF
#!/bin/bash
CURRENT_DIR=$(pwd)
CL_DIR=.
CACHE_DIR=cache
TMP_DIR=tmp
TT_DIR=treetagger
TT_BIN=""
HEIDELTIME_DIR=heideltime
PYTHON_SCRIPTS_DIR=scripts
# get OS to get the relevant executables for treetagger
if [[ "$OSTYPE" == "linux-gnu"* ]]; then
# Linux OS
TT_BIN=binLinux;
elif [[ "$OSTYPE" == "darwin"* ]]; then
# Mac OSX
TT_BIN=binMacos
else
# by default, we will try with Linux
TT_BIN=binLinux
fi
echo "$OSTYPE"
usage() {
echo "Usage: ${0} -d <path/to/CL/working/directory>"
......@@ -45,48 +64,75 @@ mkdir -p $CL_DIR/$CACHE_DIR
mkdir -p $CL_DIR/$TMP_DIR
# configure Python venv
# Mac OSX or Linux OS
python3.6 -m venv $CL_DIR/cl_env
source $CL_DIR/cl_env/bin/activate
pip install --upgrade pip
pip install -r requirements.txt
python -m pip install --upgrade pip
pip install -r requirements.txt
deactivate
mkdir -p $CL_DIR/$TT_DIR/bin
mkdir -p $CL_DIR/$TT_DIR/cmd
mkdir -p $CL_DIR/$TT_DIR/lib
mkdir -p $CL_DIR/$TT_DIR/models
cp -R core/lib/treetagger/bin/. $CL_DIR/$TT_DIR/bin/.
mkdir -p $CL_DIR/$HEIDELTIME_DIR
mkdir -p $CL_DIR/$PYTHON_SCRIPTS_DIR
cp -R core/lib/treetagger/$TT_BIN/. $CL_DIR/$TT_DIR/bin/.
cp core/lib/treetagger/cmd/utf8-tokenize.perl $CL_DIR/$TT_DIR/cmd/.
#cp core/lib/treetagger/lib/french-abbreviations $CL_DIR/$TT_DIR/lib/.
#cp core/lib/treetagger/lib/french.par $CL_DIR/$TT_DIR/lib/.
#cp core/lib/treetagger/lib/english-abbreviations $CL_DIR/$TT_DIR/lib/.
#cp core/lib/treetagger/lib/english.par $CL_DIR/$TT_DIR/lib/.
#cp core/lib/treetagger/lib/english-utf8.par $CL_DIR/$TT_DIR/lib/.
cp -R core/models/. $CL_DIR/$TT_DIR/models/.
cp -R core/lib/treetagger/lib/. $CL_DIR/$TT_DIR/lib/.
cp -R core/lib/treetagger/models/. $CL_DIR/$TT_DIR/models/.
cp core/lib/heideltime/config-heideltime-no-treetagger.props $CL_DIR/$HEIDELTIME_DIR/config-heideltime.props
cp -R core/scripts/. $CL_DIR/$PYTHON_SCRIPTS_DIR/.
cp core/src/main/resources/ciserver.settings $CL_DIR/.
mv $CL_DIR/ciserver.settings $CL_DIR/local.settings
cp core/src/main/resources/parameter.settings $CL_DIR/local.settings
ABSOLUTEPATH_MODELS=$(realpath $CL_DIR/$TT_DIR/models)
ABSOLUTEPATH_TMP=$(realpath $CL_DIR/$TMP_DIR)
ABSOLUTEPATH_CACHE=$(realpath $CL_DIR/$CACHE_DIR)
ABSOLUTEPATH_PYTHON=$(realpath $CL_DIR/cl_env)
ABSOLUTEPATH_TT=$(realpath $CL_DIR/$TT_DIR)
ABSOLUTEPATH_HEIDELTIME=$(realpath $CL_DIR/$HEIDELTIME_DIR)
ABSOLUTEPATH_PYTHON_SCRIPTS=$(realpath $CL_DIR/scripts)
# add to local.settings the absolute path to models
echo "\n\n#### GENERATED PARAMETERS\n" >> $CL_DIR/local.settings
# add to local.settings the absolute path to tmp directory
echo "# Temporary directory for ConnectionLens" >> $CL_DIR/local.settings
echo "temp_dir=${ABSOLUTEPATH_TMP}" >> $CL_DIR/local.settings
# add to local.settings the absolute path to the cache
echo "# ConnectionLens cache location" >> $CL_DIR/local.settings
echo "cache_location=${ABSOLUTEPATH_CACHE}" >> $CL_DIR/local.settings
# add to local.settings the absolute path to python
echo "# Python location" >> $CL_DIR/local.settings
echo "python_path=${ABSOLUTEPATH_PYTHON}/bin/python" >> $CL_DIR/local.settings
# add to local.settings the absolute path to python scripts
echo "# Path to python scripts (flair and pdf)" >> $CL_DIR/local.settings
echo "python_script_location=${ABSOLUTEPATH_PYTHON_SCRIPTS}" >> $CL_DIR/local.settings
# replace path to models with an absolute path
sed -i -e "s#stanford_models.*#stanford_models=${ABSOLUTEPATH_MODELS}#" $CL_DIR/local.settings
# add to local.settings the absolute paths to TreeTagger
echo "# Treetagger location" >> $CL_DIR/local.settings
echo "treetagger_home=${ABSOLUTEPATH_TT}" >> $CL_DIR/local.settings
# replace path to tmp with an absolute path
sed -i -e "s#temp_dir.*#temp_dir=${ABSOLUTEPATH_TMP}#" $CL_DIR/local.settings
echo "# Stanford models location" >> $CL_DIR/local.settings
echo "stanford_models=${ABSOLUTEPATH_MODELS}" >> $CL_DIR/local.settings
# replace path to cache with an absolute path
sed -i -e "s#cache_location.*#cache_location=${ABSOLUTEPATH_CACHE}#" $CL_DIR/local.settings
# add to local.settings the path to heideltime's settings
echo "# Configuration file used by HeidelTime (date extractor)" >> $CL_DIR/local.settings
echo "config_heideltime=${ABSOLUTEPATH_HEIDELTIME}/config-heideltime.props" >> $CL_DIR/local.settings
# replace path to python with an absolute path
sed -i -e "s#python_path.*#python_path=${ABSOLUTEPATH_PYTHON}\/bin\/python3.6#" $CL_DIR/local.settings
# add to config-heideltime.props the absolute path to TreeTagger
echo "\n\n#### GENERATED PARAMETERS\n" >> $CL_DIR/$HEIDELTIME_DIR/config-heideltime.props
echo "treeTaggerHome=${ABSOLUTEPATH_TT}" >> $CL_DIR/$HEIDELTIME_DIR/config-heideltime.props
# replace path to treetagger with an absolute path
sed -i -e "s#treetagger_home.*#treetagger_home=${ABSOLUTEPATH_TT}#" $CL_DIR/local.settings
cp $CL_DIR/local.settings core/src/main/resources/
cp $CL_DIR/local.settings core/src/main/resources
cp $CL_DIR/local.settings gui/WebContent/WEB-INF
This diff is collapsed.
## Ioana Manolescu, April 10, 2021: derived this file from HeidelTime's props by removing the line that defines treeTaggerHome.
## That (crucial) variable is set by configure.sh.
################################
## MAIN ##
################################
# Consideration of different timex3-types
# Date
considerDate = true
# Duration
considerDuration = false
# Set
considerSet = false
# Time
considerTime = false
# Temponyms (make sure you know what you do if you set this to "true")
considerTemponym = false
# This one is only necessary if you want to process chinese documents.
chineseTokenizerPath = SET ME IN CONFIG.PROPS! (e.g., /home/jannik/treetagger/chinese-tokenizer)
##################################
# paths to JVnTextPro model paths:
##################################
sent_model_path = SET ME IN CONFIG.PROPS! (e.g., /home/jannik/jvntextpro/models/jvnsensegmenter)
word_model_path = SET ME IN CONFIG.PROPS! (e.g., /home/jannik/jvntextpro/models/jvnsegmenter)
pos_model_path = SET ME IN CONFIG.PROPS! (e.g., /home/jannik/jvntextpro/models/jvnpostag/maxent)
#####################################################
# paths to Stanford POS Tagger model or config files:
#####################################################
model_path = SET ME IN CONFIG.PROPS! (e.g., /home/jannik/stanford-postagger-full-2014-01-04/models/arabic.tagger)
# leave this unset if you do not need one (e.g., /home/jannik/stanford-postagger-full-2014-01-04/tagger.config)
config_path =
########################################
## paths to hunpos and its tagger files:
########################################
hunpos_path = SET ME IN CONFIG.PROPS! (e.g., /home/jannik/hunpos)
hunpos_model_name = SET ME IN CONFIG.PROPS! (e.g., model.hunpos.mte5.defnpout)
# DO NOT CHANGE THE FOLLOWING
################################
# Relative path of type system in HeidelTime home directory
typeSystemHome = desc/type/HeidelTime_TypeSystem.xml
# Relative path of dkpro type system in HeidelTime home directory
typeSystemHome_DKPro = desc/type/DKPro_TypeSystem.xml
# Name of uima-context variables...
# ...for date-consideration
uimaVarDate = Date
# ...for duration-consideration
uimaVarDuration = Duration
# ...for language
uimaVarLanguage = Language
# ...for set-consideration
uimaVarSet = Set
# ...for time-consideration
uimaVarTime = Time
# ...for temponym-consideration
uimaVarTemponym = Temponym
# ...for type to process
uimaVarTypeToProcess = Type
@echo off
set TAGDIR=C:\TreeTagger
set BIN=%TAGDIR%\bin
set CMD=%TAGDIR%\cmd
set LIB=%TAGDIR%\lib
set TAGOPT=%LIB%\english.par -quiet -token -lemma -sgml -no-unknown
set CHUNKOPT=%LIB%\english-chunker.par -token -sgml -eps 0.00000001 -hyphen-heuristics
set OPT=-nae "if ($#F==0){print}else{print \"$F[0]-$F[1]\n\"}"
if "%2"=="" goto label1
perl %CMD%\utf8-tokenize.perl -e -a %LIB%\english-abbreviations "%~1" | %BIN%\tree-tagger %TAGOPT% | perl %OPT% | %BIN%\tree-tagger %CHUNKOPT% | perl %CMD%\filter-chunker-output.perl | %BIN%\tree-tagger %TAGOPT% > "%~2"
goto end
:label1
if "%1"=="" goto label2
perl %CMD%\utf8-tokenize.perl -e -a %LIB%\english-abbreviations "%~1" | %BIN%\tree-tagger %TAGOPT% | perl %OPT% | %BIN%\tree-tagger %CHUNKOPT% | perl %CMD%\filter-chunker-output.perl | %BIN%\tree-tagger %TAGOPT%
goto end
:label2
echo.
echo Usage: chunk-english file {file}
echo.
:end
@echo off
set TAGDIR=C:\TreeTagger
set BIN=%TAGDIR%\bin
set CMD=%TAGDIR%\cmd
set LIB=%TAGDIR%\lib
set TAGOPT=%LIB%\french.par -quiet -token -lemma -sgml -no-unknown
set CHUNKOPT=%LIB%\french-chunker.par -token -sgml -eps 0.00000001 -hyphen-heuristics
set OPT=-nae "if ($#F==0){print}else{print \"$F[0]-$F[1]\n\"}"
if "%2"=="" goto label1
perl %CMD%\utf8-tokenize.perl -f -a %LIB%\french-abbreviations "%~1" | %BIN%\tree-tagger %TAGOPT% | perl %OPT% | %BIN%\tree-tagger %CHUNKOPT% | perl %CMD%\filter-chunker-output.perl | %BIN%\tree-tagger %TAGOPT% > "%~2"
goto end
:label1
if "%1"=="" goto label2
perl %CMD%\utf8-tokenize.perl -f -a %LIB%\french-abbreviations "%~1" | %BIN%\tree-tagger %TAGOPT% | perl %OPT% | %BIN%\tree-tagger %CHUNKOPT% | perl %CMD%\filter-chunker-output-french.perl | %BIN%\tree-tagger %TAGOPT%
goto end
:label2
echo.
echo Usage: chunk-french file {file}
echo.
:end
@echo off
set TAGDIR=C:\TreeTagger
set BIN=%TAGDIR%\bin
set CMD=%TAGDIR%\cmd
set LIB=%TAGDIR%\lib
set TAGOPT=%LIB%\english.par -token -lemma -sgml -no-unknown
if "%2"=="" goto label1
perl %CMD%\utf8-tokenize.perl -e -a %LIB%\english-abbreviations "%~1" | %BIN%\tree-tagger %TAGOPT% > "%~2"
goto end
:label1
if "%1"=="" goto label2
perl %CMD%\utf8-tokenize.perl -e -a %LIB%\english-abbreviations "%~1" | %BIN%\tree-tagger %TAGOPT%
goto end
:label2
echo.
echo Usage: tag-english file {file}
echo.
:end
@echo off
set TAGDIR=C:\TreeTagger
set BIN=%TAGDIR%\bin
set CMD=%TAGDIR%\cmd
set LIB=%TAGDIR%\lib
set TAGOPT=%LIB%\french.par -token -lemma -sgml -no-unknown
if "%2"=="" goto label1
perl %CMD%\utf8-tokenize.perl -f -a %LIB%\french-abbreviations "%~1" | %BIN%\tree-tagger %TAGOPT% > "%~2"
goto end
:label1
if "%1"=="" goto label2
perl %CMD%\utf8-tokenize.perl -f -a %LIB%\french-abbreviations "%~1" | %BIN%\tree-tagger %TAGOPT%
goto end
:label2
echo.
echo Usage: tag-french file {file}
echo.
:end
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment