implemented case-insensitive search for non-ascii characters

This commit is contained in:
pagedmov 2025-09-10 17:57:47 -04:00
parent 52438bcc0d
commit 2b0404aed5

View file

@ -58,7 +58,7 @@ class U2idx(object):
self.active_id = ""
self.active_cur: Optional["sqlite3.Cursor"] = None
self.cur: dict[str, "sqlite3.Cursor"] = {}
self.mem_cur = sqlite3.connect(":memory:", check_same_thread=False).cursor()
self.mem_cur = self._connect_db(":memory:", check_same_thread=False).cursor()
self.mem_cur.execute(r"create table a (b text)")
self.sh_cur: Optional["sqlite3.Cursor"] = None
@ -69,6 +69,15 @@ class U2idx(object):
def log(self, msg: str, c: Union[int, str] = 0) -> None:
self.log_func("u2idx", msg, c)
def _connect_db(self, *args, **kwargs):
"""Wrapper for sqlite3.connect() that allows us to perform additional setup and extension of the database"""
assert sqlite3 # type: ignore # !rm
db = sqlite3.connect(*args, **kwargs)
# leverage python's casefold() string method for search terms
db.create_function("casefold", 1, lambda x: x.casefold() if x else x)
return db
def shutdown(self) -> None:
if not HAVE_SQLITE3:
return
@ -117,7 +126,7 @@ class U2idx(object):
assert sqlite3 # type: ignore # !rm
db = sqlite3.connect(self.args.shr_db, timeout=2, check_same_thread=False)
db = self._connect_db(self.args.shr_db, timeout=2, check_same_thread=False)
cur = db.cursor()
cur.execute('pragma table_info("sh")').fetchall()
self.sh_cur = cur
@ -148,7 +157,7 @@ class U2idx(object):
uri = ""
try:
uri = "{}?mode=ro&nolock=1".format(Path(db_path).as_uri())
db = sqlite3.connect(uri, timeout=2, uri=True, check_same_thread=False)
db = self._connect_db(uri, timeout=2, uri=True, check_same_thread=False)
cur = db.cursor()
cur.execute('pragma table_info("up")').fetchone()
self.log("ro: %r" % (db_path,))
@ -160,7 +169,7 @@ class U2idx(object):
if not cur:
# on windows, this steals the write-lock from up2k.deferred_init --
# seen on win 10.0.17763.2686, py 3.10.4, sqlite 3.37.2
cur = sqlite3.connect(db_path, timeout=2, check_same_thread=False).cursor()
cur = self._connect_db(db_path, timeout=2, check_same_thread=False).cursor()
self.log("opened %r" % (db_path,))
self.cur[ptop] = cur
@ -234,7 +243,7 @@ class U2idx(object):
va.append("\nrd")
elif v == "name":
v = "up.fn"
v = "casefold(up.fn)"
elif v == "tags" or ptn_mt.match(v):
have_mt = True
@ -285,6 +294,10 @@ class U2idx(object):
tail = "||'%'"
v = v[:-1]
if "casefold(up.fn)" in q:
# casefold the search term as well
v = unicode(v).casefold()
q += " {}?{} ".format(head, tail)
va.append(v)
is_key = True
@ -300,14 +313,14 @@ class U2idx(object):
continue
va.pop()
va.append(zs.lower())
va.append(zs.casefold())
q = q[: m.start()]
field, oper = m.groups()
if oper in ["=", "=="]:
q += " {} like ? ) ".format(field)
else:
q += " lower({}) {} ? ) ".format(field, oper)
q += " casefold({}) {} ? ) ".format(field, oper)
try:
return self.run_query(uname, vols, q, va, have_mt, True, lim)