Commit 21493b99 authored by Bruno Guillaume's avatar Bruno Guillaume

version 0.8.0: search box

parent a04a152e
dist
dep2pict_gui.egg-info
src/__pycache__
local
......@@ -3,7 +3,6 @@ doc:
@echo "make upload_test: upload on the test.pypi website"
@echo "make upload_prod: upload on the prod pypi website"
build:
rm -rf dist
python3 setup.py sdist
......
......@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
setuptools.setup(
name='dep2pict-gui',
version='0.7.0',
version='0.8.0',
packages=setuptools.find_packages(),
license='LICENSE/Licence_CeCILL_V2-en.txt',
description="A Qt interface for the dep2pict software",
......
......@@ -2,44 +2,43 @@ import tempfile
import subprocess
def load_conll (filename):
reset()
sent_id_list = []
sentence_offset_dict = dict()
fo = open(filename, "r", encoding='utf8')
lines = fo.read().splitlines()
fo.close()
sentences = []
offsets = []
sent=None
cpt=0
current_linenum=0
next_sent_linenum=0
for l in lines:
if l != '':
if l == '':
if sent != None and (not all(l[0] == "#" for l in sent)): # end
sent_id = get_sentid (sent)
if sent_id in sentence_offset_dict:
raise ValueError('Duplicate sent_id "%s" (line %d)' % (sent_id, current_linenum))
else:
sentence_offset_dict[sent_id] = (sent, next_sent_linenum)
sent_id_list = sent_id_list + [sent_id]
sent = None
else: # empty line
if sent == None:
sent = [l]
next_sent_linenum=cpt
next_sent_linenum=current_linenum
else:
sent = sent+[l]
else: # empty line
if sent != None and (not all(l[0] == "#" for l in sent)): # end
sentences=sentences+[sent]
offsets = offsets+[next_sent_linenum]
sent = None
cpt += 1
# needed in the is no newline at the end of the conll file
if sent != None and (not all(l[0] == "#" for l in sent)): # end
sentences=sentences+[sent]
offsets = offsets+[next_sent_linenum]
return (sentences, offsets)
current_linenum += 1
return (sent_id_list, sentence_offset_dict)
cpt = 0
next_automatic_sent_id = 0
def reset():
global cpt
cpt=0
global next_automatic_sent_id
next_automatic_sent_id=0
def new_id():
global cpt
cpt+=1
return ("%05d" % cpt)
global next_automatic_sent_id
next_automatic_sent_id+=1
return ("%05d" % next_automatic_sent_id)
def get_sentid(sentence):
try:
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment