Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 6c6bdabf authored by Robin Tissot's avatar Robin Tissot
Browse files

Merge branch 'develop'

parents 500b754f 7aea8244
Branches
Tags v0.5
No related merge requests found
Showing
with 227 additions and 92 deletions
......@@ -61,6 +61,7 @@ class DocumentSerializer(serializers.ModelSerializer):
class PartSerializer(serializers.ModelSerializer):
image = ImageField(thumbnails=['card', 'large'])
filename = serializers.CharField(read_only=True)
bw_image = ImageField(thumbnails=['large'], required=False)
workflow = serializers.JSONField(read_only=True)
transcription_progress = serializers.IntegerField(read_only=True)
......@@ -70,6 +71,7 @@ class PartSerializer(serializers.ModelSerializer):
fields = (
'pk',
'name',
'filename',
'title',
'typology',
'image',
......@@ -82,6 +84,7 @@ class PartSerializer(serializers.ModelSerializer):
def create(self, data):
document = Document.objects.get(pk=self.context["view"].kwargs["document_pk"])
data['document'] = document
data['original_filename'] = data['image'].name
obj = super().create(data)
# generate card thumbnail right away since we need it
get_thumbnailer(obj.image).get_thumbnail(settings.THUMBNAIL_ALIASES['']['card'])
......@@ -111,7 +114,7 @@ class LineTranscriptionSerializer(serializers.ModelSerializer):
class LineSerializer(serializers.ModelSerializer):
class Meta:
model = Line
fields = ('pk', 'order', 'block', 'box')
fields = ('pk', 'document_part', 'order', 'block', 'box')
def create(self, validated_data):
instance = super().create(validated_data)
......@@ -128,7 +131,7 @@ class DetailedLineSerializer(LineSerializer):
transcriptions = LineTranscriptionSerializer(many=True, required=False)
class Meta(LineSerializer.Meta):
fields = LineSerializer.Meta.fields + ('document_part', 'block', 'transcriptions',)
fields = LineSerializer.Meta.fields + ('transcriptions',)
class PartDetailSerializer(PartSerializer):
......
......@@ -188,7 +188,7 @@ class LineViewSetTestCase(CoreFactoryTestCase):
uri = reverse('api:line-list',
kwargs={'document_pk': self.part.document.pk,
'part_pk': self.part.pk})
with self.assertNumQueries(13):
with self.assertNumQueries(12):
resp = self.client.post(uri, {
'document_part': self.part.pk,
'box': '[10, 10, 50, 50]'
......
from django.db.models import Prefetch
from django.shortcuts import render
import itertools
from django.db.models import Prefetch, Count
from django.http import StreamingHttpResponse
from django.template import loader
from django.utils.text import slugify
from rest_framework.decorators import action
......@@ -36,7 +39,6 @@ class DocumentViewSet(ModelViewSet):
try:
form.process()
except ParseError as e:
return error("Incorrectly formated file, couldn't parse it.")
return Response({'status': 'ok'})
else:
......@@ -53,16 +55,7 @@ class DocumentViewSet(ModelViewSet):
return Response({'status': 'already canceled'}, status=400)
@action(detail=True, methods=['get'])
def export(self, request, pk=None):
def fetch_by_batch(queryset, start=0, size=200):
while True:
results = queryset[start:start+size]
for result in results:
yield result
if len(results) < size:
break
start += size
def export(self, request, pk=None):
format_ = request.GET.get('as', 'text')
try:
transcription = Transcription.objects.get(
......@@ -70,37 +63,53 @@ class DocumentViewSet(ModelViewSet):
except Transcription.DoesNotExist:
return Response({'error': "Object 'transcription' is required."}, status=status.HTTP_400_BAD_REQUEST)
self.object = self.get_object()
if format_ == 'text':
template = 'core/export/simple.txt'
content_type = 'text/plain'
extension = 'txt'
lines = (LineTranscription.objects.filter(transcription=transcription)
.order_by('line__document_part', 'line__document_part__order', 'line__order')
.select_related('line', 'line__document_part', 'line__block'))
context = {'lines': fetch_by_batch(lines)}
.order_by('line__document_part', 'line__document_part__order', 'line__order'))
response = StreamingHttpResponse(['%s\n' % line.content for line in lines],
content_type=content_type)
elif format_ == 'alto':
template = 'core/export/alto.xml'
content_type = 'text/xml'
extension = 'xml'
lines = (Line.objects
.filter(document_part__document=pk)
.select_related('document_part', 'block')
.order_by('document_part', 'block', 'order')
.prefetch_related(
Prefetch('transcriptions',
to_attr='transcription',
queryset=LineTranscription.objects.filter(transcription=transcription))))
context = {'lines': fetch_by_batch(lines)}
document = self.get_object()
part_pks = document.parts.values_list('pk', flat=True)
start = loader.get_template('core/export/alto_start.xml').render()
end = loader.get_template('core/export/alto_end.xml').render()
part_tmpl = loader.get_template('core/export/alto_part.xml')
response = StreamingHttpResponse(itertools.chain([start],
[part_tmpl.render({
'part': self.get_part_data(pk, transcription),
'counter': i})
for i, pk in enumerate(part_pks)],
[end]),
content_type=content_type)
else:
return Response({'error': 'Invalid format.'}, status=status.HTTP_400_BAD_REQUEST)
response = render(request, template,
context=context,
content_type=content_type)
response['Content-Disposition'] = 'attachment; filename="export-%s-%s.%s"' % (
slugify(self.object.name), datetime.now().isoformat()[:16], extension)
return response
def get_part_data(self, part_pk, transcription):
return (DocumentPart.objects
.prefetch_related(
Prefetch('blocks',
to_attr='orphan_blocks',
queryset=(Block.objects.annotate(num_lines=Count("line"))
.filter(num_lines=0))),
Prefetch('lines',
queryset=(Line.objects.all()
.select_related('block')
.prefetch_related(
Prefetch('transcriptions',
to_attr='transcription',
queryset=LineTranscription.objects.filter(transcription=transcription))))))
.get(pk=part_pk))
class PartViewSet(ModelViewSet):
......@@ -151,6 +160,12 @@ class LineViewSet(ModelViewSet):
queryset = Line.objects.all().select_related('block').prefetch_related('transcriptions__transcription')
serializer_class = DetailedLineSerializer
def get_serializer_class(self):
if self.action in ['retrieve', 'list']:
return DetailedLineSerializer
else: # create
return LineSerializer
class LargeResultsSetPagination(PageNumberPagination):
page_size = 100
......
# Generated by Django 2.1.4 on 2019-05-13 13:20
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0021_auto_20190507_1304'),
]
operations = [
migrations.AddField(
model_name='documentpart',
name='original_filename',
field=models.CharField(blank=True, max_length=1024),
),
]
......@@ -228,6 +228,7 @@ class DocumentPart(OrderedModel):
"""
name = models.CharField(max_length=512, blank=True)
image = models.ImageField(upload_to=document_images_path)
original_filename = models.CharField(max_length=1024, blank=True)
source = models.CharField(max_length=1024, blank=True)
bw_backend = models.CharField(max_length=128, default='kraken')
bw_image = models.ImageField(upload_to=document_images_path,
......@@ -290,6 +291,13 @@ class DocumentPart(OrderedModel):
@property
def segmented(self):
return self.lines.count() > 0
def make_external_id(self):
return 'eSc_page_%d' % self.pk
@property
def filename(self):
return self.original_filename or os.path.split(self.image.path)[1]
def calculate_progress(self):
if self.workflow_state < self.WORKFLOW_STATE_TRANSCRIBING:
......@@ -659,12 +667,12 @@ class Block(OrderedModel, models.Model):
document_part = models.ForeignKey(DocumentPart, on_delete=models.CASCADE,
related_name='blocks')
order_with_respect_to = 'document_part'
external_id = models.CharField(max_length=128, blank=True, null=True)
class Meta(OrderedModel.Meta):
pass
@property
def width(self):
return self.box[2] - self.box[0]
......@@ -674,7 +682,7 @@ class Block(OrderedModel, models.Model):
return self.box[3] - self.box[1]
def make_external_id(self):
return self.external_id or 'textblock_%d' % self.pk
return self.external_id or 'eSc_textblock_%d' % self.pk
class Line(OrderedModel): # Versioned,
......@@ -706,9 +714,9 @@ class Line(OrderedModel): # Versioned,
@property
def height(self):
return self.box[3] - self.box[1]
def make_external_id(self):
return self.external_id or 'line_%d' % self.pk
return self.external_id or 'eSc_line_%d' % self.pk
class Transcription(models.Model):
......
......@@ -38,8 +38,7 @@ $(document).ready(function() {
else $('a#next-part').hide();
if (data.image && data.image.uri) {
let imagename = data.image.uri.split('/').slice(-1)[0];
$('#part-name').html(data.title).attr('title', imagename);
$('#part-name').html(data.title).attr('title', '<'+data.filename+'>');
}
// set the 'image' tab btn to select the corresponding image
......@@ -59,7 +58,8 @@ $(document).ready(function() {
panels[key].reset();
}
});
// previous and next buttons
$('a#prev-part, a#next-part').click(function(ev) {
ev.preventDefault();
var pk = $(ev.target).parents('a').data('target');
......
......@@ -36,6 +36,7 @@ class partCard {
this.title = part.title;
this.typology = part.typology;
this.image = part.image;
this.filename = part.filename;
this.bw_image = part.bw_image;
this.workflow = part.workflow;
this.task_ids = {}; // helps preventing card status race conditions
......@@ -55,8 +56,7 @@ class partCard {
// fill template
$new.attr('id', $new.attr('id').replace('{pk}', this.pk));
this.updateThumbnail();
let filename = this.image.uri.split('/').splice(-1)[0];
$('img.card-img-top', $new).attr('title', this.title + '\n<' + filename +'>');
$('img.card-img-top', $new).attr('title', this.title + '\n<' + this.filename +'>');
$new.attr('draggable', true);
$('img', $new).attr('draggable', false);
......@@ -175,7 +175,7 @@ class partCard {
updateThumbnail() {
let uri, img = $('img.card-img-top', this.$element);
if (this.image.thumbnails['card'] != undefined) {
if (this.image.thumbnails && this.image.thumbnails['card'] != undefined) {
uri = this.image.thumbnails['card'];
} else {
uri = this.image.uri;
......
......@@ -4,15 +4,14 @@ class BinarizationPanel {
this.opened = opened | false;
this.$container = $('.img-container', this.$panel);
zoom.register(this.$container);
$('.img-container img', this.$panel).on('load', $.proxy(function() {
zoom.refresh();
}, this));
}
load(part) {
this.part = part;
$('.img-container img', this.$panel).on('load', $.proxy(function() {
zoom.refresh();
}, this));
if (this.part.bw_image) {
if (this.part.bw_image.thumbnails) {
$('.img-container img', this.$panel).attr('src', this.part.bw_image.thumbnails.large);
......@@ -42,7 +41,5 @@ class BinarizationPanel {
else this.open();
}
reset() {
zoom.refresh();
}
reset() {}
}
......@@ -4,15 +4,14 @@ class SourcePanel {
this.opened = opened | false;
this.$container = $('.img-container', this.$panel);
zoom.register(this.$container);
$('.img-container img', this.$panel).on('load', $.proxy(function(data) {
zoom.refresh();
}, this));
}
load(part) {
this.part = part;
$('.img-container img', this.$panel).on('load', $.proxy(function() {
zoom.refresh();
}, this));
if (this.part.image.thumbnails) {
$('.img-container img', this.$panel).attr('src', this.part.image.thumbnails.large);
} else {
......@@ -38,7 +37,5 @@ class SourcePanel {
else this.open();
}
reset() {
zoom.refresh();
}
reset() {}
}
......@@ -326,6 +326,7 @@ class TranscriptionPanel{
});
if (this.opened) this.open();
zoom.register(this.$container, true);
}
addLine(line, ratio) {
......@@ -350,6 +351,10 @@ class TranscriptionPanel{
}, this));
}, this);
getNext(1);
$('.zoom-container', this.$container).css({
width: this.part.image.size[0]*this.ratio,
height: this.part.image.size[1]*this.ratio});
}
load(part) {
......@@ -363,7 +368,6 @@ class TranscriptionPanel{
this.addLine(this.part.lines[i]);
}
this.loadTranscriptions();
zoom.register(this.$container, true);
}
open() {
......@@ -371,7 +375,7 @@ class TranscriptionPanel{
this.$panel.show();
Cookies.set('trans-panel-open', true);
}
close() {
this.opened = false;
this.$panel.hide();
......@@ -389,6 +393,10 @@ class TranscriptionPanel{
for (var i=0; i<this.lines.length; i++) {
this.lines[i].reset();
}
$('.zoom-container', this.$container).css({
width: this.part.image.size[0]*this.ratio,
height: this.part.image.size[1]*this.ratio});
}
}
}
from django.urls import reverse
from django.test import TestCase
from core.models import Line, LineTranscription
from core.models import Line, LineTranscription, Block
from core.tests.factory import CoreFactory
class DocumentExportTestCase(TestCase):
def setUp(self):
factory = CoreFactory()
self.trans = factory.make_transcription()
self.factory = CoreFactory()
self.trans = self.factory.make_transcription()
self.user = self.trans.document.owner # shortcut
for i in range(1, 3):
part = factory.make_part(name='part %d' % i,
part = self.factory.make_part(name='part %d' % i,
document=self.trans.document)
for j in range(1, 4):
l = Line.objects.create(document_part=part,
......@@ -27,8 +27,39 @@ class DocumentExportTestCase(TestCase):
resp = self.client.get(reverse('api:document-export',
kwargs={'pk': self.trans.document.pk})
+ '?transcription=' + str(self.trans.pk))
self.assertEqual(resp.content.decode(), "\nline 1:1\nline 1:2\nline 1:3\n-\nline 2:1\nline 2:2\nline 2:3\n")
self.assertEqual(''.join([c.decode() for c in resp.streaming_content]),
"line 1:1\nline 1:2\nline 1:3\nline 2:1\nline 2:2\nline 2:3\n")
def test_alto(self):
self.client.force_login(self.user)
with self.assertNumQueries(16): # should be 8 + 4*part
resp = self.client.get(reverse('api:document-export',
kwargs={'pk': self.trans.document.pk,})
+ '?transcription=%d&as=alto' % self.trans.pk)
content = list(resp.streaming_content)
self.assertEqual(len(content), 4) # start + 2 part + end
def test_alto_qs_scaling(self):
for i in range(4, 20):
part = self.factory.make_part(name='part %d' % i,
document=self.trans.document)
block = Block.objects.create(document_part=part, box=(0,0,1,1))
for j in range(1, 4):
l = Line.objects.create(document_part=part,
block=block,
box=(0,0,1,1))
LineTranscription.objects.create(
line=l,
transcription=self.trans,
content='line %d:%d' % (i,j))
self.client.force_login(self.user)
with self.assertNumQueries(80):
resp = self.client.get(reverse('api:document-export',
kwargs={'pk': self.trans.document.pk,})
+ '?transcription=%d&as=alto' % self.trans.pk)
self.assertEqual(resp.status_code, 200)
def test_invalid(self):
self.client.force_login(self.user)
resp = self.client.get(reverse('api:document-export',
......
import time
from lxml import etree
import os.path
import requests
from lxml import etree
import time
import uuid
from django.core.files.base import ContentFile
from django.db import transaction
......@@ -79,18 +80,36 @@ class XMLParser():
part.lines.all().delete()
for block in self.find(page, self.TAGS['block']):
# Note: don't use get_or_create to avoid a update query
attrs = {'document_part': part,
'external_id': block.get('ID')}
try:
b = Block.objects.get(**attrs)
except Block.DoesNotExist:
b = Block(**attrs)
b.box = self.block_bbox(block)
b.save()
id_ = block.get('ID')
if id_ and id_.startswith('eSc_dummyblock_'):
block_ = None
else:
try:
assert id_ and id_.startswith('eSc_textblock_')
attrs = {'pk': int(id_[len('eSc_textblock_'):])}
except (ValueError, AssertionError, TypeError):
attrs = {'document_part': part,
'external_id': id_}
try:
block_ = Block.objects.get(**attrs)
except Block.DoesNotExist:
block_ = Block(**attrs)
try:
block_.box = self.block_bbox(block)
except TypeError: # probably a dummy block
block = None
else:
block_.save()
for line in self.find(block, self.TAGS['line']):
attrs = {'document_part': part,
'block': b,
'external_id': line.get('ID')}
id_ = line.get('ID')
try:
assert id_ and id_.startswith('eSc_line_')
attrs = {'pk': int(id_[len('eSc_line_'):])}
except (ValueError, AssertionError, TypeError):
attrs = {'document_part': part,
'block': block_,
'external_id': line.get('ID')}
try:
l = Line.objects.get(**attrs)
except Line.DoesNotExist:
......@@ -230,8 +249,9 @@ class IIIFManifesParser():
)
if 'label' in resource:
part.name = resource['label']
name = '%d_%s' % (i, url.split('/')[-1])
part.image.save(name, ContentFile(r.content))
name = '%d_%s_%s' % (i, uuid.uuid4().hex[:5], url.split('/')[-1])
part.original_filename = name
part.image.save(name, ContentFile(r.content), save=False)
part.save()
yield part
time.sleep(0.1) # avoid being throttled
......
......@@ -42,7 +42,7 @@ class XmlImportTestCase(CoreFactoryTestCase):
self.assertEqual(self.part1.lines.first().box, [160, 771, 220, 799])
self.assertEqual(self.part1.lines.first().transcriptions.first().content, 'This is a test')
self.assertEqual(self.part2.blocks.count(), 0)
self.assertEqual(self.part2.lines.count(), 0)
self.assertEqual(self.part2.lines.count(), 0)
def test_alto_multi(self):
uri = reverse('api:document-imports', kwargs={'pk': self.document.pk})
......
......@@ -289,6 +289,10 @@ form.inline-form {
#binar-tools, #seg-tools, #trans-panel, #trans-tools {
display: none;
}
#img-panel, #binar-panel, #seg-panel, #trans-panel {
min-height: calc(100vh - 200px);
}
i.panel-icon {
vertical-align: top;
}
......@@ -303,6 +307,7 @@ i.panel-icon {
}
.zoom-container {
transform-origin: 0 0;
box-shadow: 1px 0 4px 1px rgba(0,0,0,0.1);
}
#zoom-range {
height: 100px;
......@@ -310,12 +315,15 @@ i.panel-icon {
#part-trans, #part-img {
overflow: hidden;
/* position: absolute; */
width: 100%;
height: 100%;
transform-origin:top left;
}
#trans-panel .zoom-container {
position: absolute;
}
.overlay {
top: 0;
left: 0;
......
......@@ -4,7 +4,7 @@ class WheelZoom {
constructor(options) {
this.options = options || {};
var defaults = {
factor: 0.2,
factor: 0.1,
min_scale: 1,
max_scale: null,
initial_scale: 1,
......@@ -29,13 +29,18 @@ class WheelZoom {
register(container, mirror) {
var target = container.children().first();
this.size = {w:target.width() * this.initial_scale, h:target.height() * this.initial_scale};
this.size = {w:target.width() * this.scale, h:target.height() * this.scale};
this.min_scale = this.options.min_scale || Math.min(
$(window).width() / (this.size.w * this.initial_scale) * 0.9,
$(window).height() / (this.size.h * this.initial_scale) * 0.9);
target.css({transformOrigin: '0 0', transition: 'transform 0.3s'});
if (mirror !== true) {
target.css({cursor: 'zoom-in'});
container.on("mousewheel DOMMouseScroll", $.proxy(this.scrolled, this));
container.on('mousedown', $.proxy(this.draggable, this));
} else {
container.addClass('mirror');
}
this.events.on('wheelzoom.reset', $.proxy(this.reset, this));
this.events.on('wheelzoom.refresh', $.proxy(this.refresh, this));
......@@ -100,17 +105,23 @@ class WheelZoom {
updateStyle() {
// Make sure the slide stays in its container area when zooming in/out
if (this.scale > 1) {
let container = this.getVisibleContainer();
if (this.size.w*this.scale > container.width()) {
if(this.pos.x > 0) { this.pos.x = 0; }
if(this.pos.x+this.size.w*this.scale < this.size.w) { this.pos.x = this.size.w - this.size.w*this.scale; }
if(this.pos.y > 0) { this.pos.y = 0; }
if(this.pos.y+this.size.h*this.scale < this.size.h) { this.pos.y = this.size.h - this.size.h*this.scale; }
if(this.pos.x+this.size.w*this.scale < container.width()) { this.pos.x = container.width() - this.size.w*this.scale; }
} else {
if(this.pos.x < 0) { this.pos.x = 0; }
if(this.pos.x+this.size.w*this.scale > this.size.w) { this.pos.x = -this.size.w*(this.scale-1); }
if(this.pos.x+this.size.w*this.scale > container.width()) { this.pos.x = container.width() - this.size.w*this.scale; }
}
if (this.size.h*this.scale > container.height()) {
if(this.pos.y > 0) { this.pos.y = 0; }
if(this.pos.y+this.size.h*this.scale < container.height()) { this.pos.y = container.height() - this.size.h*this.scale; }
} else {
if(this.pos.y < 0) { this.pos.y = 0; }
if(this.pos.y+this.size.h*this.scale > this.size.h) { this.pos.y = -this.size.h*(this.scale-1); }
if(this.pos.y+this.size.h*this.scale > container.height()) { this.pos.y = container.height() - this.size.h*this.scale; }
}
// apply scale first for transition effect
this.targets.forEach($.proxy(function(e, i) {
......@@ -122,12 +133,13 @@ class WheelZoom {
}
getVisibleContainer() {
return this.containers.find(function(e) { return e.is(':visible') && e.height() != 0;});
return this.containers.find(function(e) { return e.is(':visible:not(.mirror)') && e.height() != 0;});
}
refresh() {
let container = this.getVisibleContainer();
this.size = {w: container.width(), h: container.height()};
var target = container.children().first();
this.size = {w:target.width(), h:target.height()};
this.min_scale = this.options.min_scale || Math.min(
$(window).width() / (this.size.w * this.initial_scale) * 0.9,
$(window).height() / (this.size.h * this.initial_scale) * 0.9);
......
</Layout></alto>
<Page PHYSICAL_IMG_NR="{{ counter }}" WIDTH="{{ part.image.width }}" HEIGHT="{{ part.image.height }}" ID="{{part.make_external_id}}">
<PrintSpace HPOS="0" VPOS="0" WIDTH="{{ part.image.width }}" HEIGHT="{{ part.image.height }}">
{% regroup part.lines.all by block as part_blocks %}{% for block in part_blocks %}{% ifchanged block.grouper %}
<TextBlock {% if block.grouper %}HPOS="{{ block.grouper.box.0|default:0 }}" VPOS="{{ block.grouper.box.1|default:0 }}" WIDTH="{{ block.grouper.width|default:0 }}" HEIGHT="{{ block.grouper.height|default:0 }}" ID="{{ block.grouper.make_external_id }}"{% else %}ID="eSc_dummyblock_{{ block.list.0.pk }}"{% endif %}>{% endifchanged %}
{% for line in block.list %}<TextLine ID="{{ line.make_external_id }}" HPOS="{{ line.box.0 }}" VPOS="{{ line.box.1 }}" WIDTH="{{ line.width }}" HEIGHT="{{ line.height }}">
<String CONTENT="{{ line.transcription.0.content }}" HPOS="{{ line.box.0 }}" VPOS="{{ line.box.1 }}" WIDTH="{{ line.width }}" HEIGHT="{{ line.height }}"></String>
</TextLine>{% endfor %}
</TextBlock>{% endfor %}
{% for block in part.orphan_blocks %}<TextBlock HPOS="{{ block.box.0 }}" VPOS="{{ block.box.1 }}" WIDTH="{{ block.width }}" HEIGHT="{{ block.height }}" ID="{{ block.make_external_id }}">
</TextBlock>{% endfor %}
</PrintSpace>
</Page>
<?xml version="1.0" encoding="UTF-8"?>
<alto xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://www.loc.gov/standards/alto/ns-v4#"
xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v4# http://www.loc.gov/standards/alto/v4/alto-4-0.xsd">
<Layout>
......@@ -7,6 +7,6 @@
{% block body %}
<div class="jumbotron">
<h1>eScriptorium</h1>
<p>A project providing digital recognition of handwritten documents using machine learning techniques<p>
<p>{% trans "A project providing digital recognition of handwritten documents using machine learning techniques." %}<p>
</div>
{% endblock %}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment