Commit 37206020 authored by Robin Tissot's avatar Robin Tissot
Browse files

Merge branch 'develop'

parents 4a027484 b41b8421
......@@ -4,17 +4,37 @@ import uuid
from django.db import migrations
def batch_qs(qs, batch_size=1000):
"""
Returns a (start, end, total, queryset) tuple for each batch in the given
queryset.
Usage:
# Make sure to order your querset
article_qs = Article.objects.order_by('id')
for start, end, total, qs in batch_qs(article_qs):
print "Now processing %s - %s of %s" % (start + 1, end, total)
for article in qs:
print article.body
"""
total = qs.count()
for start in range(0, total, batch_size):
end = min(start + batch_size, total)
yield (start, end, total, qs[start:end])
def forward(apps, se):
Block = apps.get_model('core', 'Block')
Line = apps.get_model('core', 'Line')
for block in Block.objects.filter(external_id=None):
block.external_id = 'eSc_textblock_%s' % str(uuid.uuid4())[:8]
block.save()
for s,e,t, blocks in batch_qs(Block.objects.filter(external_id=None)):
for block in blocks:
block.external_id = 'eSc_textblock_%s' % str(uuid.uuid4())[:8]
block.save()
for line in Line.objects.filter(external_id=None):
line.external_id = 'eSc_line_%s' % str(uuid.uuid4())[:8]
line.save()
for s,e,t, lines in batch_qs(Line.objects.filter(external_id=None)):
for line in lines:
line.external_id = 'eSc_line_%s' % str(uuid.uuid4())[:8]
line.save()
def backward(apps, se):
......
......@@ -8,6 +8,7 @@ import subprocess
import uuid
from PIL import Image
from datetime import datetime
from shapely.geometry import Polygon, LineString
from django.db import models, transaction
from django.db.models import Q, Prefetch
......@@ -639,24 +640,33 @@ class DocumentPart(OrderedModel):
res = blla.segment(im, **options)
regs = []
if steps in ['regions', 'both']:
block_types = {t.name: t for t in self.document.valid_block_types.all()}
for region_type, regions in res['regions'].items():
for region in regions:
Block.objects.create(
block = Block.objects.create(
document_part=self,
typology=block_types.get(region_type),
box=region)
regs.append(block)
if steps in ['lines', 'both']:
line_types = {t.name: t for t in self.document.valid_line_types.all()}
for line in res['lines']:
mask = line['boundary'] if line['boundary'] is not None else None
baseline = line['baseline']
# calculate if the center of the line is contained in one of the region
# (pick the first one that matches)
center = LineString(baseline).interpolate(0.5, normalized=True)
region = next((r for r in regs if Polygon(r.box).contains(center)), None)
Line.objects.create(
document_part=self,
typology=line_types.get(line['script']),
# region=region_map.get(line['region']),
baseline=line['baseline'],
block=region,
baseline=baseline,
mask=mask)
im.close()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment