Commit a4e894f2 authored by Robin Tissot's avatar Robin Tissot
Browse files

Merge branch 'develop'

parents 37206020 3f81cc4a
......@@ -23,7 +23,7 @@ RUN apt-get update
RUN addgroup --system uwsgi
RUN adduser --system --no-create-home --ingroup uwsgi uwsgi
RUN apt-get install netcat-traditional jpegoptim pngcrush
RUN apt-get install netcat-traditional jpegoptim pngcrush libvips
RUN pip install --upgrade pip
......
......@@ -5,8 +5,8 @@ import re
import requests
import time
import uuid
import warnings
import zipfile
import pyvips
from lxml import etree
from django.conf import settings
......@@ -66,6 +66,47 @@ class ParserDocument:
return transcription
class PdfParser(ParserDocument):
def validate(self):
try:
self.doc = pyvips.Image.new_from_buffer(self.file.read(), "",
dpi=300, n=-1, access="sequential")
except pyvips.error.Error as e:
logger.exception(e)
raise ParseError(_("Invalid pdf file."))
@property
def total(self):
if 'n-pages' in self.doc.get_fields():
return self.doc.get('n-pages')
else:
return 0
def parse(self, start_at=0, override=False, user=None):
self.doc = pyvips.Image.new_from_buffer(self.file.read(), "",
dpi=300, n=-1, access="sequential")
try:
self.doc.flatten(background=255)
n_pages = self.doc.get('n-pages')
page_width = self.doc.width
page_height = self.doc.height / n_pages
for i in range(0, n_pages):
page = self.doc.crop(0, i * page_height, page_width, page_height)
part = DocumentPart(document=self.document)
part.image.save('%s_page_%d.png' % (self.file.name, i+1),
ContentFile(page.write_to_buffer('.png')))
part.save()
yield part
except pyvips.error.Error as e:
msg = _("Parse error in {filename}: {error}, skipping it.").format(
filename=self.file.name, error=e.args[0]
)
logger.warning(msg)
if self.report:
self.report.append(msg)
class ZipParser(ParserDocument):
"""
For now only deals with a flat list of Alto files
......@@ -684,6 +725,8 @@ def make_parser(document, file_handler, name=None, report=None):
return IIIFManifestParser(document, file_handler, report)
elif ext == "zip":
return ZipParser(document, file_handler, report, transcription_name=name)
elif ext == "pdf":
return PdfParser(document, file_handler, report)
else:
raise ValueError(
"Invalid extension for the file to be parsed %s." % file_handler.name
......
/*
Baseline editor
a javascript based baseline segmentation editor,
requires paper.js and colorThief is optional.
requires paper.js.
Usage:
var segmenter = new Segmenter(img, options);
......@@ -13,6 +13,7 @@
deletePointBtn=null,
deleteSelectionBtn=null,
toggleMasksBtn=null,
toggleLineModeBtn=null,
splitBtn=null,
mergeBtn=null,
......@@ -155,7 +156,7 @@ class SegmenterLine {
this.baseline = baseline.map(pt=>[Math.round(pt[0]), Math.round(pt[1])]);
this.baselinePath = new Path({
strokeColor: this.segmenter.baselinesColor,
strokeWidth: 5/this.segmenter.getRatio(),
// strokeWidth: 5/this.segmenter.getRatio(),
strokeCap: 'butt',
selectedColor: 'black',
opacity: 0.5,
......@@ -187,9 +188,20 @@ class SegmenterLine {
this.showOrdering();
this.showDirection();
if (this.baselinePath) {
this.baselinePath.strokeWidth = 5/this.segmenter.getRatio();
if (this.segmenter.wideLineStrokes) {
this.baselinePath.strokeWidth = 5/this.segmenter.getRatio();
} else {
this.baselinePath.strokeWidth = 1;
}
}
if (this.directionHint) {
if (this.segmenter.showDirectionHint) {
this.directionHint.visible = true;
this.directionHint.strokeColor = this.hintColor;
} else {
this.directionHint.visible = false;
}
}
if (this.directionHint) this.directionHint.strokeColor = this.hintColor;
}
getMaskColor() {
......@@ -471,6 +483,7 @@ class Segmenter {
regionTypes=['Title', 'Main', 'Marginal', 'Illustration', 'Numbering'],
lineTypes=['Main', 'Interlinear'],
wideLineStrokes=true,
// todo: choose keyboard shortcuts
inactiveLayerOpacity=0.5,
......@@ -527,6 +540,8 @@ class Segmenter {
this.regionAreaThreshold = regionAreaThreshold;
this.showMasks = false;
this.showLineNumbers = false;
this.showDirectionHint = true;
this.wideLineStrokes = wideLineStrokes;
this.selecting = null;
this.spliting = false;
......@@ -534,6 +549,7 @@ class Segmenter {
// menu btns
this.toggleMasksBtn = document.getElementById('be-toggle-masks');
this.toggleLineModeBtn = document.getElementById('be-toggle-line-mode');
this.toggleOrderingBtn = document.getElementById('be-toggle-order');
this.toggleRegionModeBtn = document.getElementById('be-toggle-regions');
this.splitBtn = document.getElementById('be-split-lines');
......@@ -641,6 +657,11 @@ class Segmenter {
this.toggleMasks();
}.bind(this));
}
if (this.toggleLineModeBtn) {
this.toggleLineModeBtn.addEventListener('click', function(event) {
this.toggleLineMode();
}.bind(this));
}
if (this.splitBtn) this.splitBtn.addEventListener('click', function(event) {
this.spliting = !this.spliting;
......@@ -734,7 +755,7 @@ class Segmenter {
} else if (event.keyCode == 74) { // J (for join)
this.mergeSelection();
} else if (event.keyCode == 77) { // M
this.toggleMasks();
this.toggleLineMode();
} else if (event.keyCode == 76) { // L
this.toggleOrdering();
} else if (event.keyCode == 82) { // R
......@@ -1499,14 +1520,56 @@ class Segmenter {
}
}
toggleMasks(force) {
this.showMasks = force || !this.showMasks;
if (this.showMasks) {
this.toggleMasksBtn.classList.add('btn-success');
this.toggleMasksBtn.classList.remove('btn-info');
toggleLineMode() {
// wide: default mode
// mask: show the boundary of the line
// slim: set the strokeWidth at 1px
if (this.showMasks) { // mask -> slim
this.toggleMasks(false);
this.toggleLineStrokes(false);
this.toggleLineModeBtn.classList.add('btn-secondary');
this.toggleLineModeBtn.classList.remove('btn-success');
} else {
this.toggleMasksBtn.classList.add('btn-info');
this.toggleMasksBtn.classList.remove('btn-success');
if (!this.wideLineStrokes) { // slim -> wide
this.toggleMasks(false);
this.toggleLineStrokes(true);
this.toggleLineModeBtn.classList.add('btn-info');
this.toggleLineModeBtn.classList.remove('btn-secondary');
} else { // wide -> mask
this.toggleMasks(true);
this.toggleLineStrokes(true);
this.toggleLineModeBtn.classList.add('btn-success');
this.toggleLineModeBtn.classList.remove('btn-info');
}
}
}
toggleLineStrokes(force) {
// wide / slim
if (force != undefined) this.wideLineStrokes = force;
else this.wideLineStrokes = !this.wideLineStrokes;
this.showDirectionHint = this.wideLineStrokes;
for (let i in this.lines) {
this.lines[i].refresh();
}
}
toggleMasks(force) {
if (force !== undefined) this.showMasks = force;
else this.showMasks = !this.showMasks;
if (this.toggleMasksBtn) {
if (this.showMasks) {
this.toggleMasksBtn.classList.add('btn-success');
this.toggleMasksBtn.classList.remove('btn-info');
} else {
this.toggleMasksBtn.classList.add('btn-info');
this.toggleMasksBtn.classList.remove('btn-success');
}
}
for (let i in this.lines) {
let poly = this.lines[i].maskPath;
......
......@@ -30,7 +30,8 @@
<i class="fas fa-file-import mr-1"></i>{% trans "Import" %}
</button>
<div class="dropdown-menu">
<a data-proc="import-iiif" class="js-proc-selected dropdown-item" href="#">Images (IIIF)</a>
<a data-proc="import-iiif" class="js-proc-selected dropdown-item" href="#">Images (IIIF)</a>
<a data-proc="import-pdf" class="js-proc-selected dropdown-item" href="#">Images (PDF)</a>
<a data-proc="import-xml" class="js-proc-selected dropdown-item" href="#">Transcriptions (XML)</a>
</div>
</div>
......@@ -103,6 +104,7 @@
{# Process wizards #}
{% include 'core/wizards/import_iiif.html' with proc='import-iiif' %}
{% include 'core/wizards/import_file.html' with proc='import-xml' %}
{% include 'core/wizards/import_pdf.html' with proc='import-pdf' %}
{% include 'core/wizards/export.html' with proc='export' %}
{% include 'core/wizards/binarize.html' with proc='binarize' %}
{% include 'core/wizards/segment.html' with proc='segment' %}
......
......@@ -248,8 +248,8 @@
title="{% trans "Toggle ordering display. (L)" %}"
class="btn btn-sm btn-info fas fa-sort-numeric-down"
autocomplete="off"></button>
<button id="be-toggle-masks"
title="{% trans "Show line masks. (M)" %}"
<button id="be-toggle-line-mode"
title="{% trans "Toggle line masks and stroke width. (M)" %}"
class="btn btn-sm btn-info fas fa-mask"></button>
</div>
<div class="btn-group">
......@@ -316,7 +316,7 @@
v-bind:key="'sL' + line.pk"/>
</div>
<img class="panel-img"/>
<img class="panel-img" v-bind:src="imageSrc"/>
{# TODO: make line overlay component #}
<div id="segmentation-overlay" class="overlay panel-overlay">
<svg width="100%" height="100%">
......
......@@ -2,7 +2,7 @@
{% load i18n bootstrap %}
{% block wizard_help %}
<p>{% blocktrans with opentag='<a target="_blank" href="https://gitlab.inria.fr/scripta/escriptorium/wikis/imports">'|safe closetag='</a>'|safe %}See {{opentag}}the import documentation{{closetag}}(DEPRECATED) for details.{% endblocktrans %}</p>
<p>{% blocktrans with opentag='<a target="_blank" href="https://gitlab.inria.fr/scripta/escriptorium/wikis/imports">'|safe closetag='</a>'|safe %}See {{opentag}}the import documentation{{closetag}} for details.{% endblocktrans %}</p>
{% endblock %}
{% block wizard_action %}action="{% url 'api:document-imports' pk=object.pk %}"{% endblock %}
......
{% extends 'core/wizards/import.html' %}
{% load i18n bootstrap %}
{% block selected_images %}{% endblock %}
{% block wizard_action %}action="{% url 'api:document-imports' pk=object.pk %}"{% endblock %}
{% block wizard_fields %}
{{ block.super }}
{% if not import_form.current_import.ongoing %}
<h5>{% trans "Import images from a PDF document." %}</h5>
<div class="form-group">
{% render_field import_form.upload_file class="js-proc-settings" accept=".pdf" %}
</div>
{% endif %}
{% endblock %}
{% block wizard_submit %}{% trans "Start importing" %}{% endblock %}
......@@ -12,7 +12,7 @@ django-redis==4.10.0
psycopg2-binary==2.7.6
django-ordered-model==3.1.1
easy-thumbnails==2.5
git+https://github.com/mittagessen/kraken.git@3.0b13#egg=kraken
git+https://github.com/mittagessen/kraken.git@3.0b14#egg=kraken
django-cleanup==3.0.1
djangorestframework==3.9.2
drf-nested-routers==0.91
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment