11import { ascending , descending , reverse } from "d3-array" ;
22import { FileAttachment } from "./fileAttachment.js" ;
3- import { isArrowTable } from "./arrow.js" ;
3+ import { isArrowTable , loadArrow } from "./arrow.js" ;
44import { DuckDBClient } from "./duckdb.js" ;
55
66const nChecks = 20 ; // number of values to check in each array
@@ -143,43 +143,100 @@ function isTypedArray(value) {
143143
144144// __query is used by table cells; __query.sql is used by SQL cells.
145145export const __query = Object . assign (
146- async ( source , operations , invalidation ) => {
147- source = await loadDataSource ( await source , "table" ) ;
146+ async ( source , operations , invalidation , name ) => {
147+ source = await loadTableDataSource ( await source , name ) ;
148148 if ( isDatabaseClient ( source ) ) return evaluateQuery ( source , makeQueryTemplate ( operations , source ) , invalidation ) ;
149149 if ( isDataArray ( source ) ) return __table ( source , operations ) ;
150150 if ( ! source ) throw new Error ( "missing data source" ) ;
151151 throw new Error ( "invalid data source" ) ;
152152 } ,
153153 {
154- sql ( source , invalidation ) {
154+ sql ( source , invalidation , name ) {
155155 return async function ( ) {
156- return evaluateQuery ( await loadDataSource ( await source , "sql" ) , arguments , invalidation ) ;
156+ return evaluateQuery ( await loadSqlDataSource ( await source , name ) , arguments , invalidation ) ;
157157 } ;
158158 }
159159 }
160160) ;
161161
162- export async function loadDataSource ( source , mode ) {
163- if ( source instanceof FileAttachment ) {
164- if ( mode === "table" ) {
165- switch ( source . mimeType ) {
166- case "text/csv" : return source . csv ( { typed : true } ) ;
167- case "text/tab-separated-values" : return source . tsv ( { typed : true } ) ;
168- case "application/json" : return source . json ( ) ;
169- }
162+ export async function loadDataSource ( source , mode , name ) {
163+ switch ( mode ) {
164+ case "table" : return loadTableDataSource ( source , name ) ;
165+ case "sql" : return loadSqlDataSource ( source , name ) ;
166+ }
167+ return source ;
168+ }
169+
170+ // We use a weak map to cache loaded data sources by key so that we don’t have
171+ // to e.g. create separate SQLiteDatabaseClients every time we’re querying the
172+ // same SQLite file attachment. Since this is a weak map, unused references will
173+ // be garbage collected when they are no longer desired. Note: the name should
174+ // be consistent, as it is not part of the cache key!
175+ function sourceCache ( loadSource ) {
176+ const cache = new WeakMap ( ) ;
177+ return ( source , name ) => {
178+ if ( ! source ) throw new Error ( "data source not found" ) ;
179+ let promise = cache . get ( source ) ;
180+ if ( ! promise ) {
181+ // Warning: do not await here! We need to populate the cache synchronously.
182+ promise = loadSource ( source , name ) ;
183+ cache . set ( source , promise ) ;
170184 }
171- if ( mode === "table" || mode === "sql" ) {
172- switch ( source . mimeType ) {
173- case "application/x-sqlite3" : return source . sqlite ( ) ;
174- }
175- if ( / \. a r r o w $ / i. test ( source . name ) ) return DuckDBClient . of ( { __table : await source . arrow ( { version : 9 } ) } ) ;
185+ return promise ;
186+ } ;
187+ }
188+
189+ const loadTableDataSource = sourceCache ( async ( source , name ) => {
190+ if ( source instanceof FileAttachment ) {
191+ switch ( source . mimeType ) {
192+ case "text/csv" : return source . csv ( { typed : true } ) ;
193+ case "text/tab-separated-values" : return source . tsv ( { typed : true } ) ;
194+ case "application/json" : return source . json ( ) ;
195+ case "application/x-sqlite3" : return source . sqlite ( ) ;
176196 }
197+ if ( / \. ( a r r o w | p a r q u e t ) $ / i. test ( source . name ) ) return loadDuckDBClient ( source , name ) ;
177198 throw new Error ( `unsupported file type: ${ source . mimeType } ` ) ;
178199 }
179- if ( ( mode === "table" || mode === "sql" ) && isArrowTable ( source ) ) {
180- return DuckDBClient . of ( { __table : source } ) ;
200+ if ( isArrowTable ( source ) ) return loadDuckDBClient ( source , name ) ;
201+ return source ;
202+ } ) ;
203+
204+ const loadSqlDataSource = sourceCache ( async ( source , name ) => {
205+ if ( source instanceof FileAttachment ) {
206+ switch ( source . mimeType ) {
207+ case "text/csv" :
208+ case "text/tab-separated-values" :
209+ case "application/json" : return loadDuckDBClient ( source , name ) ;
210+ case "application/x-sqlite3" : return source . sqlite ( ) ;
211+ }
212+ if ( / \. ( a r r o w | p a r q u e t ) $ / i. test ( source . name ) ) return loadDuckDBClient ( source , name ) ;
213+ throw new Error ( `unsupported file type: ${ source . mimeType } ` ) ;
181214 }
215+ if ( isDataArray ( source ) ) return loadDuckDBClient ( await asArrowTable ( source , name ) , name ) ;
216+ if ( isArrowTable ( source ) ) return loadDuckDBClient ( source , name ) ;
182217 return source ;
218+ } ) ;
219+
220+ async function asArrowTable ( array , name ) {
221+ const arrow = await loadArrow ( ) ;
222+ return arrayIsPrimitive ( array )
223+ ? arrow . tableFromArrays ( { [ name ] : array } )
224+ : arrow . tableFromJSON ( array ) ;
225+ }
226+
227+ function loadDuckDBClient (
228+ source ,
229+ name = source instanceof FileAttachment
230+ ? getFileSourceName ( source )
231+ : "__table"
232+ ) {
233+ return DuckDBClient . of ( { [ name ] : source } ) ;
234+ }
235+
236+ function getFileSourceName ( file ) {
237+ return file . name
238+ . replace ( / @ \d + (? = \. | $ ) / , "" ) // strip Observable file version number
239+ . replace ( / \. \w + $ / , "" ) ; // strip file extension
183240}
184241
185242async function evaluateQuery ( source , args , invalidation ) {
@@ -248,9 +305,9 @@ export function makeQueryTemplate(operations, source) {
248305 throw new Error ( "missing from table" ) ;
249306 if ( select . columns && select . columns . length === 0 )
250307 throw new Error ( "at least one column must be selected" ) ;
251- const columns = select . columns ? select . columns . map ( ( c ) => `t. ${ escaper ( c ) } ` ) : "*" ;
308+ const columns = select . columns ? select . columns . map ( escaper ) . join ( ", " ) : "*" ;
252309 const args = [
253- [ `SELECT ${ columns } FROM ${ formatTable ( from . table , escaper ) } t ` ]
310+ [ `SELECT ${ columns } FROM ${ formatTable ( from . table , escaper ) } ` ]
254311 ] ;
255312 for ( let i = 0 ; i < filter . length ; ++ i ) {
256313 appendSql ( i ? `\nAND ` : `\nWHERE ` , args ) ;
@@ -303,8 +360,9 @@ function formatTable(table, escaper) {
303360 if ( table . schema != null ) from += escaper ( table . schema ) + "." ;
304361 from += escaper ( table . table ) ;
305362 return from ;
363+ } else {
364+ return escaper ( table ) ;
306365 }
307- return table ;
308366}
309367
310368function appendSql ( sql , args ) {
@@ -313,7 +371,7 @@ function appendSql(sql, args) {
313371}
314372
315373function appendOrderBy ( { column, direction} , args , escaper ) {
316- appendSql ( `t. ${ escaper ( column ) } ${ direction . toUpperCase ( ) } ` , args ) ;
374+ appendSql ( `${ escaper ( column ) } ${ direction . toUpperCase ( ) } ` , args ) ;
317375}
318376
319377function appendWhereEntry ( { type, operands} , args , escaper ) {
@@ -398,7 +456,7 @@ function appendWhereEntry({type, operands}, args, escaper) {
398456
399457function appendOperand ( o , args , escaper ) {
400458 if ( o . type === "column" ) {
401- appendSql ( `t. ${ escaper ( o . value ) } ` , args ) ;
459+ appendSql ( escaper ( o . value ) , args ) ;
402460 } else {
403461 args . push ( o . value ) ;
404462 args [ 0 ] . push ( "" ) ;
@@ -421,7 +479,9 @@ function likeOperand(operand) {
421479}
422480
423481// This function applies table cell operations to an in-memory table (array of
424- // objects); it should be equivalent to the corresponding SQL query.
482+ // objects); it should be equivalent to the corresponding SQL query. TODO Use
483+ // DuckDBClient for data arrays, too, and then we wouldn’t need our own __table
484+ // function to do table operations on in-memory data?
425485export function __table ( source , operations ) {
426486 const input = source ;
427487 let { schema, columns} = source ;
0 commit comments