Commit 6c6bdabf authored by Robin Tissot's avatar Robin Tissot
Browse files

Merge branch 'develop'

parents 500b754f 7aea8244
......@@ -61,6 +61,7 @@ class DocumentSerializer(serializers.ModelSerializer):
class PartSerializer(serializers.ModelSerializer):
image = ImageField(thumbnails=['card', 'large'])
filename = serializers.CharField(read_only=True)
bw_image = ImageField(thumbnails=['large'], required=False)
workflow = serializers.JSONField(read_only=True)
transcription_progress = serializers.IntegerField(read_only=True)
......@@ -70,6 +71,7 @@ class PartSerializer(serializers.ModelSerializer):
fields = (
'pk',
'name',
'filename',
'title',
'typology',
'image',
......@@ -82,6 +84,7 @@ class PartSerializer(serializers.ModelSerializer):
def create(self, data):
document = Document.objects.get(pk=self.context["view"].kwargs["document_pk"])
data['document'] = document
data['original_filename'] = data['image'].name
obj = super().create(data)
# generate card thumbnail right away since we need it
get_thumbnailer(obj.image).get_thumbnail(settings.THUMBNAIL_ALIASES['']['card'])
......@@ -111,7 +114,7 @@ class LineTranscriptionSerializer(serializers.ModelSerializer):
class LineSerializer(serializers.ModelSerializer):
class Meta:
model = Line
fields = ('pk', 'order', 'block', 'box')
fields = ('pk', 'document_part', 'order', 'block', 'box')
def create(self, validated_data):
instance = super().create(validated_data)
......@@ -128,7 +131,7 @@ class DetailedLineSerializer(LineSerializer):
transcriptions = LineTranscriptionSerializer(many=True, required=False)
class Meta(LineSerializer.Meta):
fields = LineSerializer.Meta.fields + ('document_part', 'block', 'transcriptions',)
fields = LineSerializer.Meta.fields + ('transcriptions',)
class PartDetailSerializer(PartSerializer):
......
......@@ -188,7 +188,7 @@ class LineViewSetTestCase(CoreFactoryTestCase):
uri = reverse('api:line-list',
kwargs={'document_pk': self.part.document.pk,
'part_pk': self.part.pk})
with self.assertNumQueries(13):
with self.assertNumQueries(12):
resp = self.client.post(uri, {
'document_part': self.part.pk,
'box': '[10, 10, 50, 50]'
......
from django.db.models import Prefetch
from django.shortcuts import render
import itertools
from django.db.models import Prefetch, Count
from django.http import StreamingHttpResponse
from django.template import loader
from django.utils.text import slugify
from rest_framework.decorators import action
......@@ -36,7 +39,6 @@ class DocumentViewSet(ModelViewSet):
try:
form.process()
except ParseError as e:
return error("Incorrectly formated file, couldn't parse it.")
return Response({'status': 'ok'})
else:
......@@ -53,16 +55,7 @@ class DocumentViewSet(ModelViewSet):
return Response({'status': 'already canceled'}, status=400)
@action(detail=True, methods=['get'])
def export(self, request, pk=None):
def fetch_by_batch(queryset, start=0, size=200):
while True:
results = queryset[start:start+size]
for result in results:
yield result
if len(results) < size:
break
start += size
def export(self, request, pk=None):
format_ = request.GET.get('as', 'text')
try:
transcription = Transcription.objects.get(
......@@ -70,37 +63,53 @@ class DocumentViewSet(ModelViewSet):
except Transcription.DoesNotExist:
return Response({'error': "Object 'transcription' is required."}, status=status.HTTP_400_BAD_REQUEST)
self.object = self.get_object()
if format_ == 'text':
template = 'core/export/simple.txt'
content_type = 'text/plain'
extension = 'txt'
lines = (LineTranscription.objects.filter(transcription=transcription)
.order_by('line__document_part', 'line__document_part__order', 'line__order')
.select_related('line', 'line__document_part', 'line__block'))
context = {'lines': fetch_by_batch(lines)}
.order_by('line__document_part', 'line__document_part__order', 'line__order'))
response = StreamingHttpResponse(['%s\n' % line.content for line in lines],
content_type=content_type)
elif format_ == 'alto':
template = 'core/export/alto.xml'
content_type = 'text/xml'
extension = 'xml'
lines = (Line.objects
.filter(document_part__document=pk)
.select_related('document_part', 'block')
.order_by('document_part', 'block', 'order')
.prefetch_related(
Prefetch('transcriptions',
to_attr='transcription',
queryset=LineTranscription.objects.filter(transcription=transcription))))
context = {'lines': fetch_by_batch(lines)}
document = self.get_object()
part_pks = document.parts.values_list('pk', flat=True)
start = loader.get_template('core/export/alto_start.xml').render()
end = loader.get_template('core/export/alto_end.xml').render()
part_tmpl = loader.get_template('core/export/alto_part.xml')
response = StreamingHttpResponse(itertools.chain([start],
[part_tmpl.render({
'part': self.get_part_data(pk, transcription),
'counter': i})
for i, pk in enumerate(part_pks)],
[end]),
content_type=content_type)
else:
return Response({'error': 'Invalid format.'}, status=status.HTTP_400_BAD_REQUEST)
response = render(request, template,
context=context,
content_type=content_type)
response['Content-Disposition'] = 'attachment; filename="export-%s-%s.%s"' % (
slugify(self.object.name), datetime.now().isoformat()[:16], extension)
return response
def get_part_data(self, part_pk, transcription):
return (DocumentPart.objects
.prefetch_related(
Prefetch('blocks',
to_attr='orphan_blocks',
queryset=(Block.objects.annotate(num_lines=Count("line"))
.filter(num_lines=0))),
Prefetch('lines',
queryset=(Line.objects.all()
.select_related('block')
.prefetch_related(
Prefetch('transcriptions',
to_attr='transcription',
queryset=LineTranscription.objects.filter(transcription=transcription))))))
.get(pk=part_pk))
class PartViewSet(ModelViewSet):
......@@ -151,6 +160,12 @@ class LineViewSet(ModelViewSet):
queryset = Line.objects.all().select_related('block').prefetch_related('transcriptions__transcription')
serializer_class = DetailedLineSerializer
def get_serializer_class(self):
if self.action in ['retrieve', 'list']:
return DetailedLineSerializer
else: # create
return LineSerializer
class LargeResultsSetPagination(PageNumberPagination):
page_size = 100
......
# Generated by Django 2.1.4 on 2019-05-13 13:20
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0021_auto_20190507_1304'),
]
operations = [
migrations.AddField(
model_name='documentpart',
name='original_filename',
field=models.CharField(blank=True, max_length=1024),
),
]
......@@ -228,6 +228,7 @@ class DocumentPart(OrderedModel):
"""
name = models.CharField(max_length=512, blank=True)
image = models.ImageField(upload_to=document_images_path)
original_filename = models.CharField(max_length=1024, blank=True)
source = models.CharField(max_length=1024, blank=True)
bw_backend = models.CharField(max_length=128, default='kraken')
bw_image = models.ImageField(upload_to=document_images_path,
......@@ -290,6 +291,13 @@ class DocumentPart(OrderedModel):
@property
def segmented(self):
return self.lines.count() > 0
def make_external_id(self):
return 'eSc_page_%d' % self.pk
@property
def filename(self):
return self.original_filename or os.path.split(self.image.path)[1]
def calculate_progress(self):
if self.workflow_state < self.WORKFLOW_STATE_TRANSCRIBING:
......@@ -659,12 +667,12 @@ class Block(OrderedModel, models.Model):
document_part = models.ForeignKey(DocumentPart, on_delete=models.CASCADE,
related_name='blocks')
order_with_respect_to = 'document_part'
external_id = models.CharField(max_length=128, blank=True, null=True)
class Meta(OrderedModel.Meta):
pass
@property
def width(self):
return self.box[2] - self.box[0]
......@@ -674,7 +682,7 @@ class Block(OrderedModel, models.Model):
return self.box[3] - self.box[1]
def make_external_id(self):
return self.external_id or 'textblock_%d' % self.pk
return self.external_id or 'eSc_textblock_%d' % self.pk
class Line(OrderedModel): # Versioned,
......@@ -706,9 +714,9 @@ class Line(OrderedModel): # Versioned,
@property
def height(self):
return self.box[3] - self.box[1]
def make_external_id(self):
return self.external_id or 'line_%d' % self.pk
return self.external_id or 'eSc_line_%d' % self.pk
class Transcription(models.Model):
......
......@@ -38,8 +38,7 @@ $(document).ready(function() {
else $('a#next-part').hide();
if (data.image && data.image.uri) {
let imagename = data.image.uri.split('/').slice(-1)[0];
$('#part-name').html(data.title).attr('title', imagename);
$('#part-name').html(data.title).attr('title', '<'+data.filename+'>');
}
// set the 'image' tab btn to select the corresponding image
......@@ -59,7 +58,8 @@ $(document).ready(function() {
panels[key].reset();
}
});
// previous and next buttons
$('a#prev-part, a#next-part').click(function(ev) {
ev.preventDefault();
var pk = $(ev.target).parents('a').data('target');
......
......@@ -36,6 +36,7 @@ class partCard {
this.title = part.title;
this.typology = part.typology;
this.image = part.image;
this.filename = part.filename;
this.bw_image = part.bw_image;
this.workflow = part.workflow;
this.task_ids = {}; // helps preventing card status race conditions
......@@ -55,8 +56,7 @@ class partCard {
// fill template
$new.attr('id', $new.attr('id').replace('{pk}', this.pk));
this.updateThumbnail();
let filename = this.image.uri.split('/').splice(-1)[0];
$('img.card-img-top', $new).attr('title', this.title + '\n<' + filename +'>');
$('img.card-img-top', $new).attr('title', this.title + '\n<' + this.filename +'>');
$new.attr('draggable', true);
$('img', $new).attr('draggable', false);
......@@ -175,7 +175,7 @@ class partCard {
updateThumbnail() {
let uri, img = $('img.card-img-top', this.$element);
if (this.image.thumbnails['card'] != undefined) {
if (this.image.thumbnails && this.image.thumbnails['card'] != undefined) {
uri = this.image.thumbnails['card'];
} else {
uri = this.image.uri;
......
......@@ -4,15 +4,14 @@ class BinarizationPanel {
this.opened = opened | false;
this.$container = $('.img-container', this.$panel);
zoom.register(this.$container);
$('.img-container img', this.$panel).on('load', $.proxy(function() {
zoom.refresh();
}, this));
}
load(part) {
this.part = part;
$('.img-container img', this.$panel).on('load', $.proxy(function() {
zoom.refresh();
}, this));
if (this.part.bw_image) {
if (this.part.bw_image.thumbnails) {
$('.img-container img', this.$panel).attr('src', this.part.bw_image.thumbnails.large);
......@@ -42,7 +41,5 @@ class BinarizationPanel {
else this.open();
}
reset() {
zoom.refresh();
}
reset() {}
}
......@@ -4,15 +4,14 @@ class SourcePanel {
this.opened = opened | false;
this.$container = $('.img-container', this.$panel);
zoom.register(this.$container);
$('.img-container img', this.$panel).on('load', $.proxy(function(data) {
zoom.refresh();
}, this));
}
load(part) {
this.part = part;
$('.img-container img', this.$panel).on('load', $.proxy(function() {
zoom.refresh();
}, this));
if (this.part.image.thumbnails) {
$('.img-container img', this.$panel).attr('src', this.part.image.thumbnails.large);
} else {
......@@ -38,7 +37,5 @@ class SourcePanel {
else this.open();
}
reset() {
zoom.refresh();
}
reset() {}
}
......@@ -326,6 +326,7 @@ class TranscriptionPanel{
});
if (this.opened) this.open();
zoom.register(this.$container, true);
}
addLine(line, ratio) {
......@@ -350,6 +351,10 @@ class TranscriptionPanel{
}, this));
}, this);
getNext(1);
$('.zoom-container', this.$container).css({
width: this.part.image.size[0]*this.ratio,
height: this.part.image.size[1]*this.ratio});
}
load(part) {
......@@ -363,7 +368,6 @@ class TranscriptionPanel{
this.addLine(this.part.lines[i]);
}
this.loadTranscriptions();
zoom.register(this.$container, true);
}
open() {
......@@ -371,7 +375,7 @@ class TranscriptionPanel{
this.$panel.show();
Cookies.set('trans-panel-open', true);
}
close() {
this.opened = false;
this.$panel.hide();
......@@ -389,6 +393,10 @@ class TranscriptionPanel{
for (var i=0; i<this.lines.length; i++) {
this.lines[i].reset();
}
$('.zoom-container', this.$container).css({
width: this.part.image.size[0]*this.ratio,
height: this.part.image.size[1]*this.ratio});
}
}
}
from django.urls import reverse
from django.test import TestCase
from core.models import Line, LineTranscription
from core.models import Line, LineTranscription, Block
from core.tests.factory import CoreFactory
class DocumentExportTestCase(TestCase):
def setUp(self):
factory = CoreFactory()
self.trans = factory.make_transcription()
self.factory = CoreFactory()
self.trans = self.factory.make_transcription()
self.user = self.trans.document.owner # shortcut
for i in range(1, 3):
part = factory.make_part(name='part %d' % i,
part = self.factory.make_part(name='part %d' % i,
document=self.trans.document)
for j in range(1, 4):
l = Line.objects.create(document_part=part,
......@@ -27,8 +27,39 @@ class DocumentExportTestCase(TestCase):
resp = self.client.get(reverse('api:document-export',
kwargs={'pk': self.trans.document.pk})
+ '?transcription=' + str(self.trans.pk))
self.assertEqual(resp.content.decode(), "\nline 1:1\nline 1:2\nline 1:3\n-\nline 2:1\nline 2:2\nline 2:3\n")
self.assertEqual(''.join([c.decode() for c in resp.streaming_content]),
"line 1:1\nline 1:2\nline 1:3\nline 2:1\nline 2:2\nline 2:3\n")
def test_alto(self):
self.client.force_login(self.user)
with self.assertNumQueries(16): # should be 8 + 4*part
resp = self.client.get(reverse('api:document-export',
kwargs={'pk': self.trans.document.pk,})
+ '?transcription=%d&as=alto' % self.trans.pk)
content = list(resp.streaming_content)
self.assertEqual(len(content), 4) # start + 2 part + end
def test_alto_qs_scaling(self):
for i in range(4, 20):
part = self.factory.make_part(name='part %d' % i,
document=self.trans.document)
block = Block.objects.create(document_part=part, box=(0,0,1,1))
for j in range(1, 4):
l = Line.objects.create(document_part=part,
block=block,
box=(0,0,1,1))
LineTranscription.objects.create(
line=l,
transcription=self.trans,
content='line %d:%d' % (i,j))
self.client.force_login(self.user)
with self.assertNumQueries(80):
resp = self.client.get(reverse('api:document-export',
kwargs={'pk': self.trans.document.pk,})
+ '?transcription=%d&as=alto' % self.trans.pk)
self.assertEqual(resp.status_code, 200)
def test_invalid(self):
self.client.force_login(self.user)
resp = self.client.get(reverse('api:document-export',
......
import time
from lxml import etree
import os.path
import requests
from lxml import etree
import time
import uuid
from django.core.files.base import ContentFile
from django.db import transaction
......@@ -79,18 +80,36 @@ class XMLParser():
part.lines.all().delete()
for block in self.find(page, self.TAGS['block']):
# Note: don't use get_or_create to avoid a update query
attrs = {'document_part': part,
'external_id': block.get('ID')}
try:
b = Block.objects.get(**attrs)
except Block.DoesNotExist:
b = Block(**attrs)
b.box = self.block_bbox(block)
b.save()
id_ = block.get('ID')
if id_ and id_.startswith('eSc_dummyblock_'):
block_ = None
else:
try:
assert id_ and id_.startswith('eSc_textblock_')
attrs = {'pk': int(id_[len('eSc_textblock_'):])}
except (ValueError, AssertionError, TypeError):
attrs = {'document_part': part,
'external_id': id_}
try:
block_ = Block.objects.get(**attrs)
except Block.DoesNotExist:
block_ = Block(**attrs)
try:
block_.box = self.block_bbox(block)
except TypeError: # probably a dummy block
block = None
else:
block_.save()
for line in self.find(block, self.TAGS['line']):
attrs = {'document_part': part,
'block': b,
'external_id': line.get('ID')}
id_ = line.get('ID')
try:
assert id_ and id_.startswith('eSc_line_')
attrs = {'pk': int(id_[len('eSc_line_'):])}
except (ValueError, AssertionError, TypeError):
attrs = {'document_part': part,
'block': block_,
'external_id': line.get('ID')}
try:
l = Line.objects.get(**attrs)
except Line.DoesNotExist:
......@@ -230,8 +249,9 @@ class IIIFManifesParser():
)
if 'label' in resource:
part.name = resource['label']
name = '%d_%s' % (i, url.split('/')[-1])
part.image.save(name, ContentFile(r.content))
name = '%d_%s_%s' % (i, uuid.uuid4().hex[:5], url.split('/')[-1])
part.original_filename = name
part.image.save(name, ContentFile(r.content), save=False)
part.save()
yield part
time.sleep(0.1) # avoid being throttled
......
......@@ -42,7 +42,7 @@ class XmlImportTestCase(CoreFactoryTestCase):
self.assertEqual(self.part1.lines.first().box, [160, 771, 220, 799])
self.assertEqual(self.part1.lines.first().transcriptions.first().content, 'This is a test')
self.assertEqual(self.part2.blocks.count(), 0)
self.assertEqual(self.part2.lines.count(), 0)
self.assertEqual(self.part2.lines.count(), 0)
def test_alto_multi(self):
uri = reverse('api:document-imports', kwargs={'pk': self.document.pk})
......
......@@ -289,6 +289,10 @@ form.inline-form {
#binar-tools, #seg-tools, #trans-panel, #trans-tools {
display: none;
}
#img-panel, #binar-panel, #seg-panel, #trans-panel {
min-height: calc(100vh - 200px);
}
i.panel-icon {
vertical-align: top;
}
......@@ -303,6 +307,7 @@ i.panel-icon {
}
.zoom-container {
transform-origin: 0 0;
box-shadow: 1px 0 4px 1px rgba(0,0,0,0.1);
}
#zoom-range {
height: 100px;
......@@ -310,12 +315,15 @@ i.panel-icon {
#part-trans, #part-img {
overflow: hidden;
/* position: absolute; */
width: 100%;
height: 100%;
transform-origin:top left;
}
#trans-panel .zoom-container {
position: absolute;
}
.overlay {
top: 0;
left: 0;
......
......@@ -4,7 +4,7 @@ class WheelZoom {
constructor(options) {
this.options = options || {};
var defaults = {
factor: 0.2,
factor: 0.1,
min_scale: 1,
max_scale: null,
initial_scale: 1,
......@@ -29,13 +29,18 @@ class WheelZoom {
register(container, mirror) {
var target = container.children().first();
this.size = {w:target.width() * this.initial_scale, h:target.height() * this.initial_scale};
this.size = {w:target.width() * this.scale, h:target.height() * this.scale};
this.min_scale = this.options.min_scale || Math.min(
$(window).width() / (this.size.w * this.initial_scale) * 0.9,
$(window).height() / (this.size.h * this.initial_scale) * 0.9);