diff --git a/querybuilder/classes/atoms.py b/querybuilder/classes/atoms.py index 3ee2fe035a14c2f851dfcf5e75e0a86e5b0c8b2e..9888eca9750b48d4e2906fc0bbd4fc1e6f8e42eb 100644 --- a/querybuilder/classes/atoms.py +++ b/querybuilder/classes/atoms.py @@ -93,6 +93,7 @@ class Atom(object): return {k: getattr(self, k) for k in slots} def subtokenize(self, tokenizer, scoped=False): + self = tokenizer.transform(self) if scoped and self._scopable: return tokenizer.scope_it(self.subtokenize(tokenizer)) return tokenizer(self, **self._get_subtokenize_kwargs(tokenizer)) diff --git a/querybuilder/drivers/sql/tokenizer.py b/querybuilder/drivers/sql/tokenizer.py index d0b23ad8154a0f2f279015d618c4f1bca3a18f85..08a08443abb2ad8b2c7019151d65944c404bb0e1 100644 --- a/querybuilder/drivers/sql/tokenizer.py +++ b/querybuilder/drivers/sql/tokenizer.py @@ -973,15 +973,29 @@ class Tokenizer: @__call__.register(relations.Aliased) def _( - self, obj: relations.Aliased, /, *, subrelation: TkTree, name: TkTree + self, + obj: relations.Aliased, + /, + *, + subrelation: TkTree, + name: TkSeq, + columns: Tuple[TkTree], ) -> TkTree: + if columns: + tcomma = TkStr(qbtoken.Punctuation, ",").to_seq() + cols_tree = self.tokenize_list(tcomma, columns, accolate=True) + name_tree = name + TkStr(qbtoken.Punctuation, "(").to_seq() + name_tree = self.scope_it(cols_tree, noindent=True, left=name_tree) + else: + name_tree = name + as_ = self.tokenize_keyword("AS") if isinstance(subrelation, TkSeq): - return (subrelation, as_, name) + return (subrelation, as_, name_tree) subrelation = cast( tuple[TkTree], subrelation ) # TODO: why does mypy fail to infer type without this cast? - return (*subrelation[:-1], (subrelation[-1], (as_, name))) + return (*subrelation[:-1], (subrelation[-1], (as_, name_tree))) @__call__.register(relations.CartesianProduct) def _( @@ -1315,4 +1329,12 @@ class Tokenizer: return transform(key_placeholder, tokens) + ####################### + # ATOM TRANSFORMATION # + ####################### + + @singledispatchmethod + def transform(self, obj: atoms.Atom) -> atoms.Atom: + return obj + del _ diff --git a/querybuilder/drivers/sqlite/tokenizer.py b/querybuilder/drivers/sqlite/tokenizer.py index 98f05b56a0d944a9fd8030b57c7f6298f2cbc850..b07f221708aff9102048b5d6424782c552833565 100644 --- a/querybuilder/drivers/sqlite/tokenizer.py +++ b/querybuilder/drivers/sqlite/tokenizer.py @@ -3,12 +3,16 @@ import re from uuid import UUID import warnings from functools import singledispatchmethod -from typing import Callable, List, Optional +from typing import Callable, List, Optional, cast from querybuilder.utils.decorators import TypeDispatch from querybuilder.formatting.token import Token from querybuilder.formatting.tokentree import TkInd, TkTree, TkSeq, TkStr from querybuilder.drivers.sql.tokenizer import Tokenizer as sqlTokenizer, name_and_defer from querybuilder.queries.algebra.clauses import Limit, OrderColumn +from querybuilder.classes.atoms import Atom +from querybuilder.queries.dql import Select +import querybuilder.queries.algebra.relations as qbrelations +import querybuilder.queries.algebra.columns as qbcolumns import querybuilder as qb @@ -197,3 +201,34 @@ class Tokenizer(sqlTokenizer): return TkStr(Token.Name.Builtin, "TEXT").to_seq() del _ + + ####################### + # ATOM TRANSFORMATION # + ####################### + + @singledispatchmethod + def transform(self, obj: Atom) -> Atom: + return super().transform(obj) + + @transform.register(qbrelations.Aliased) + def _t(self, obj: qbrelations.Aliased) -> qbrelations.Relation: + if not obj.column_aliases: + return obj + + left_aliases = {i: name for i, name in enumerate(obj._get_column_names())} + left_columns = tuple(qbcolumns.Null() for i in obj.subrelation.columns) + naming_relation = Select( + left_columns, where=qbcolumns.False_(), aliases=left_aliases + ) + + subrel = obj.subrelation + if not isinstance(subrel, qbrelations.Fromable): + subrel = subrel.alias(obj.name) + + subrel = cast(qbrelations.Fromable, subrel) + + rel = naming_relation.union_all(subrel.select((qbcolumns.Star(),))) + + return rel.alias(obj.name) + + del _t diff --git a/querybuilder/queries/algebra/relations.py b/querybuilder/queries/algebra/relations.py index 9813bc7d85fac3bdbf1bc7e0d6a41d3a51735f8d..ce42013ff428ca3948ad00a8c05b4629c4e171ed 100644 --- a/querybuilder/queries/algebra/relations.py +++ b/querybuilder/queries/algebra/relations.py @@ -10,6 +10,7 @@ from typing import ( Any, Sequence, Self, + Dict, ) from typing_extensions import ClassVar import abc, functools, itertools, operator @@ -68,8 +69,12 @@ class Relation(atoms.Atom): def arity(self): return len(self.columns) - def alias(self, name): - return Aliased(self, name) + def alias( + self, + name: str, + column_aliases: Mapping[int, str] | Iterable[tuple[int, str]] = (), + ): + return Aliased(self, name, column_aliases) def _substitute(self, substitutions: Mapping) -> Self: columns = tuple(c.substitute(substitutions) for c in self.columns) @@ -247,20 +252,63 @@ class Named(Prenamed, Fromable): # Actual classes class Aliased(Named): - __slots__: tuple[str, ...] = ("subrelation",) + __slots__: tuple[str, ...] = ("subrelation", "column_aliases") - def __init__(self, subrelation, name, **kwargs): + def __init__( + self, + subrelation: Relation, + name: str, + column_aliases: Mapping[int, str] | Iterable[tuple[int, str]] = (), + **kwargs, + ): self.subrelation = subrelation - super().__init__(name, columns=subrelation.columns, **kwargs) + self.column_aliases: Mapping[int, str] = qbstores.Frozenmap(column_aliases) + + if self.column_aliases: + for i, c in enumerate(self.subrelation.columns): + if not i in self.column_aliases and not isinstance(c, qbcolumns.Named): + raise ValueError("Missing alias for an unnamed column") + + super().__init__(name, **kwargs) + + def _get_column_names(self): + return tuple( + self.column_aliases[i] if i in self.column_aliases else c.name + for i, c in enumerate(self.subrelation.columns) + ) + + @property + def columns(self): + return self._column_store_factory( + ( + qbcolumns.name_column(c, self.column_aliases[i]) + if i in self.column_aliases + else c + for i, c in enumerate(self.subrelation.columns) + ) + ) + + def _init_columns(self, columns): + assert not columns def alias(self, name): return self.buildfrom(self, name=name) def _get_subtokenize_kwargs(self, tokenizer): - return dict( - subrelation=self.subrelation.subtokenize(tokenizer, scoped=True), - **super()._get_subtokenize_kwargs(tokenizer), - ) + kwargs = super()._get_subtokenize_kwargs(tokenizer) + kwargs["subrelation"] = self.subrelation.subtokenize(tokenizer, scoped=True) + if self.column_aliases: + CW = querybuilder.queries.algebra.clauses.AliasedColumn + kwargs["columns"] = tuple( + map( + lambda c: CW(qbcolumns.unqualify(c)).subtokenize(tokenizer), + self.columns, + ) + ) + else: + kwargs["columns"] = () + + return kwargs def accept(self, accumulator): accumulator = self.subrelation.accept(accumulator) @@ -277,6 +325,12 @@ class Aliased(Named): return self.buildfrom(self, subrelation=subrel) + def __getstate__(self): + state = super().__getstate__() + state["columns"] = None # maybe a little ugly? + + return state + class With(Named): __slots__ = ("subrelation", "materialized") diff --git a/querybuilder/queries/dql.py b/querybuilder/queries/dql.py index 50b1050ece2f380be68f3f497c1188cab216479d..30fbab56df233b39b5e48fb1c67c8261af524116 100644 --- a/querybuilder/queries/dql.py +++ b/querybuilder/queries/dql.py @@ -524,6 +524,7 @@ class SetCombination(DQLQuery): """ __slots__ = ("combinator", "all", "subrelations") + _scopable = True def __init__( self, diff --git a/querybuilder/tests/classes/test_atoms.py b/querybuilder/tests/classes/test_atoms.py index ebe9e76623dfb3a961ccceaf644f446505b20ed1..39251c2657c7ae83fb04f79faa618d4206342581 100644 --- a/querybuilder/tests/classes/test_atoms.py +++ b/querybuilder/tests/classes/test_atoms.py @@ -20,6 +20,7 @@ class TestAtom: def mock_tokenizer(self, mocker, mock_tokens, mock_scoped_tokens): tk = Mock(name="tokenizer", return_value=mock_tokens) tk.scope_it = Mock(name="scope_it", return_value=mock_scoped_tokens) + tk.transform = lambda x: x return tk diff --git a/querybuilder/tests/drivers/sql/test_tokenizer.py b/querybuilder/tests/drivers/sql/test_tokenizer.py index cf13a39b24697784f9453e50be8a3708b4628136..71c4b48a2f5f53f8b7f207514e9b0c0b15c7415a 100644 --- a/querybuilder/tests/drivers/sql/test_tokenizer.py +++ b/querybuilder/tests/drivers/sql/test_tokenizer.py @@ -3,6 +3,8 @@ import querybuilder as qb from querybuilder.formatting import token as qbtoken from querybuilder.formatting.tokentree import TkStr, TkSeq, TkTree, TkInd import querybuilder.utils.constants as qbconstants +import querybuilder.queries.algebra.columns as qbcolumns +import querybuilder.queries.algebra.relations as qbrelations class TestSQLTokenizer: @@ -1469,3 +1471,136 @@ class TestSQLTokenizer: result = self.tk(self.get_empty_instance(qb.queries.algebra.columns.Star)) expected = TkStr(qbtoken.Operator, "*").to_seq() + assert expected == result + + def test_aliased_relation_with_tkseq_without_column_aliases(self): + name = self.get_dummy_tkseq(value="bar") + subrelation = self.get_dummy_tkseq(value="foo") + + result = self.tk( + self.get_empty_instance(qb.queries.algebra.relations.Aliased), + subrelation=subrelation, + name=name, + columns=(), + ) + + expected = (subrelation, TkStr(qbtoken.Token.Keyword, "AS").to_seq(), name) + + assert expected == result + + def test_aliased_relation_with_tkseq_and_column_aliases(self): + name = self.get_dummy_tkseq(value="bar") + subrelation = self.get_dummy_tkseq(value="foo") + columns = self.get_dummy_columns(3) + + result = self.tk( + self.get_empty_instance(qb.queries.algebra.relations.Aliased), + subrelation=subrelation, + name=name, + columns=columns, + ) + + sepseq = TkStr(qbtoken.Token.Punctuation, ",").to_seq() + expected = ( + subrelation, + TkStr(qbtoken.Token.Keyword, "AS").to_seq(), + ( + name + TkStr(qbtoken.Punctuation, "(").to_seq(), + ( + columns[0] + sepseq, + columns[1] + sepseq, + columns[2], + ), + TkStr(qbtoken.Punctuation, ")").to_seq(), + ), + ) + + assert expected == result + + def test_aliased_relation_with_tktree_without_column_aliases(self): + name = self.get_dummy_tkseq(value="bar") + subrelation_seq = self.get_dummy_tkseq(value="foo") + + subrelation = ( + TkStr(qbtoken.Punctuation, "(").to_seq(), + (TkInd(), subrelation_seq), + TkStr(qbtoken.Punctuation, ")").to_seq(), + ) + + result = self.tk( + self.get_empty_instance(qb.queries.algebra.relations.Aliased), + subrelation=subrelation, + name=name, + columns=(), + ) + + expected = ( + TkStr(qbtoken.Punctuation, "(").to_seq(), + (TkInd(), subrelation_seq), + ( + TkStr(qbtoken.Punctuation, ")").to_seq(), + (TkStr(qbtoken.Token.Keyword, "AS").to_seq(), name), + ), + ) + + assert expected == result + + def test_aliased_relation_with_tktree_and_column_aliases(self): + name = self.get_dummy_tkseq(value="bar") + subrelation_seq = self.get_dummy_tkseq(value="foo") + columns = self.get_dummy_columns(3) + + subrelation = ( + TkStr(qbtoken.Punctuation, "(").to_seq(), + (TkInd(), subrelation_seq), + TkStr(qbtoken.Punctuation, ")").to_seq(), + ) + + result = self.tk( + self.get_empty_instance(qb.queries.algebra.relations.Aliased), + subrelation=subrelation, + name=name, + columns=columns, + ) + + sepseq = TkStr(qbtoken.Token.Punctuation, ",").to_seq() + expected = ( + TkStr(qbtoken.Punctuation, "(").to_seq(), + (TkInd(), subrelation_seq), + ( + TkStr(qbtoken.Punctuation, ")").to_seq(), + ( + TkStr(qbtoken.Token.Keyword, "AS").to_seq(), + ( + name + TkStr(qbtoken.Punctuation, "(").to_seq(), + ( + columns[0] + sepseq, + columns[1] + sepseq, + columns[2], + ), + TkStr(qbtoken.Punctuation, ")").to_seq(), + ), + ), + ), + ) + + assert expected == result + + def test_transform_aliased_relation_without_column_aliases(self): + column_names = ("x", "b", "z") + columns = tuple(qbcolumns.Named(int, name) for name in column_names) + + subrel = qbrelations.Named("foo", columns=columns) + rel = qbrelations.Aliased(subrel, "bar") + + assert rel == self.tk.transform(rel) + + def test_transform_aliased_relation_with_column_aliases(self): + column_names = ("x", "b", "z") + columns = tuple(qbcolumns.Named(int, name) for name in column_names) + + subrel = qbrelations.Named("foo", columns=columns) + + rel = qbrelations.Aliased(subrel, "bar", column_aliases={1: "col"}) + + assert rel == self.tk.transform(rel) diff --git a/querybuilder/tests/drivers/sqlite/test_sqlite_tokenizer.py b/querybuilder/tests/drivers/sqlite/test_sqlite_tokenizer.py index 11c737d90ff1b8490f1e1f3816d8f4d8e8fff252..6f1e6b25359dbc3561e547ddaa15a7b0e8541146 100644 --- a/querybuilder/tests/drivers/sqlite/test_sqlite_tokenizer.py +++ b/querybuilder/tests/drivers/sqlite/test_sqlite_tokenizer.py @@ -4,6 +4,10 @@ from querybuilder.formatting import token as qbtoken from querybuilder.formatting.tokentree import TkStr, TkSeq, TkTree, TkInd import querybuilder.utils.constants as qbconstants import querybuilder.tests.drivers.sql.test_tokenizer as parent_suite +import querybuilder.queries.algebra.relations as qbrelations +import querybuilder.queries.algebra.columns as qbcolumns +import querybuilder.queries.dql as qbdql + # can't import TestSQLTokenizer as it would rerun the SQL tests @@ -80,3 +84,51 @@ class TestSqliteTokenizer(parent_suite.TestSQLTokenizer): ) assert expected == result + + def test_transform_aliased_named_relation_with_column_aliases(self): + column_names = ("x", "b", "z") + columns = tuple(qbcolumns.Named(int, name) for name in column_names) + + subrel = qbrelations.Named("foo", columns=columns) + + relation_alias = "bar" + column_aliases = {1: "y"} + + rel = qbrelations.Aliased(subrel, relation_alias, column_aliases) + result = self.tk.transform(rel) + + assert relation_alias == result.name + + result_subrel = result.subrelation.right.from_ + assert subrel == result_subrel + assert tuple(subrel.columns) == tuple(result.subrelation.right.columns) + + expected_column_names = ("x", "y", "z") + naming_relation = result.subrelation.left + assert qbcolumns.False_() == naming_relation.where + for i, c in enumerate(naming_relation.columns): + assert expected_column_names[i] == c.name + + def test_transform_aliased_unnamed_relation_with_column_aliases(self): + columns = tuple(qbcolumns.make_column(i) for i in range(3)) + + subrel = qbdql.Select(columns) + + relation_alias = "bar" + column_aliases = {0: "x", 1: "y", 2: "z"} + + rel = subrel.alias(relation_alias, column_aliases=column_aliases) + result = self.tk.transform(rel) + + assert relation_alias == result.name + + result_subrel = result.subrelation.right.from_ + assert subrel == result_subrel.subrelation + assert relation_alias == result_subrel.name + assert tuple(subrel.columns) == tuple(result.subrelation.right.columns) + + expected_column_names = ("x", "y", "z") + naming_relation = result.subrelation.left + assert qbcolumns.False_() == naming_relation.where + for i, c in enumerate(naming_relation.columns): + assert expected_column_names[i] == c.name diff --git a/querybuilder/tests/queries/algebra/test_relations.py b/querybuilder/tests/queries/algebra/test_relations.py index 0c6409ad5b179e712c899d325e6855976781190d..0a530c062e5d15430c21b679a6db29255d5c3f52 100644 --- a/querybuilder/tests/queries/algebra/test_relations.py +++ b/querybuilder/tests/queries/algebra/test_relations.py @@ -1,10 +1,14 @@ import pytest +from pytest_mock import MockerFixture +from mock import Mock + import querybuilder import querybuilder.queries.dml as qbdml from querybuilder.schemas.helper import table, ColumnSpec import querybuilder.queries.algebra.columns as qbcolumns import querybuilder.queries.algebra.relations as qbrelations import querybuilder.schemas.constraints as qbconstraints +from querybuilder.tests.utils import create_subtokenizable_mock class TestRelation: @@ -273,6 +277,128 @@ class TestAliased: assert post_subrel == post_rel.subrelation + def test_columns_without_aliases(self): + col_1 = qbcolumns.make_column(1) + col_2 = qbcolumns.make_column(2) + + columns = (col_1, col_2) + + subrel = qbrelations.Named("foo", columns=columns) + + rel = qbrelations.Aliased(subrel, "bar") + + assert columns == rel.columns + + @pytest.mark.parametrize("aliases", [{0: "a", 2: "c"}, ((2, "c"), (0, "a"))]) + def test_columns_aliases(self, aliases): + columns = ( + qbcolumns.Named(int, "x"), + qbcolumns.Named(int, "b"), + qbcolumns.make_column(42), + ) + + subrel = qbrelations.Named("foo", columns=columns) + + rel = qbrelations.Aliased(subrel, "bar", aliases) + + expected_columns = ( + qbcolumns.Named(int, "a"), + subrel.columns[1], + qbcolumns.Named(int, "c"), + ) + + assert expected_columns == rel.columns + + def test_unnamed_columns_must_be_aliased(self): + columns = ( + qbcolumns.Named(int, "x"), + qbcolumns.Named(int, "b"), + qbcolumns.make_column(42), + ) + + subrel = qbrelations.Named("foo", columns=columns) + + aliases = {0: "a"} + + with pytest.raises(ValueError): + rel = qbrelations.Aliased(subrel, "bar", aliases) + + def test_get_subtokenize_kwargs_with_aliases(self, mocker): + subrelation_tok = "subrelation foo" + name_tok = "bar" + column_tok = "column" + + name = "foo" + + subrelation = Mock() + subrelation.subtokenize = Mock(return_value=subrelation_tok) + subrelation.columns = ( + qbcolumns.Named(int, "col_0"), + qbcolumns.Named(int, "col_1"), + ) + + tokenizer = Mock() + tokenizer.tokenize_name = Mock(return_value=name_tok) + + create_subtokenizable_mock( + mocker, "querybuilder.queries.algebra.clauses.AliasedColumn", column_tok + ) + + rel = qbrelations.Aliased( + subrelation, + name, + column_aliases={1: "alias"}, + ) + + expected = { + "subrelation": subrelation_tok, + "name": name_tok, + "columns": (column_tok, column_tok), + } + + result = rel._get_subtokenize_kwargs(tokenizer) + + assert expected == result + tokenizer.tokenize_name.assert_called_once_with( + schema_name=None, relation_name=name + ) + subrelation.subtokenize.assert_called_once_with(tokenizer, scoped=True) + + AC = querybuilder.queries.algebra.clauses.AliasedColumn + for c in rel.columns: + AC.assert_any_call(qbcolumns.unqualify(c)) + + def test_get_subtokenize_kwargs_without_aliases(self, mocker): + subrelation_tok = "subrelation foo" + name_tok = "bar" + + name = "foo" + + subrelation = Mock() + subrelation.subtokenize = Mock(return_value=subrelation_tok) + subrelation.columns = ( + qbcolumns.Named(int, "col_0"), + qbcolumns.Named(int, "col_1"), + ) + + tokenizer = Mock() + tokenizer.tokenize_name = Mock(return_value=name_tok) + + rel = qbrelations.Aliased( + subrelation, + name, + ) + + expected = {"subrelation": subrelation_tok, "name": name_tok, "columns": ()} + + result = rel._get_subtokenize_kwargs(tokenizer) + + assert expected == result + tokenizer.tokenize_name.assert_called_once_with( + schema_name=None, relation_name=name + ) + subrelation.subtokenize.assert_called_once_with(tokenizer, scoped=True) + class TestWith: def test_substitute(self): diff --git a/querybuilder/tests/utils/test_transaction_manager.py b/querybuilder/tests/utils/test_transaction_manager.py index ecb436c41b62acef0df207f604f249b6f5bc93b9..65328883f3b739b39464e72856d0e3d754d54af4 100644 --- a/querybuilder/tests/utils/test_transaction_manager.py +++ b/querybuilder/tests/utils/test_transaction_manager.py @@ -54,6 +54,8 @@ def mock_tokenizer(tkseq): tokenizer.attach_mock(lambda x: x, "_post") + tokenizer.transform = lambda x: x + return tokenizer