[ins] In [1]: import ibis
[ins] In [2]: from ibis import _
[ins] In [3]: con = ibis.duckdb.connect("/home/gil/data/imdb.ddb")
[ins] In [4]: ratings = con.tables.ratings
[ins] In [5]: basics = con.tables.basics
[ins] In [6]: ratings = ratings.select(
...: ratings.tconst,
...: avg_rating=ratings.averageRating.cast("float"),
...: num_votes=ratings.numVotes.cast("int"),
...: )
[ins] In [7]: basics = basics.filter([basics.titleType == "movie", basics.isAdult == "0"]).select(
...: "tconst", "primaryTitle", "startYear"
...: )
[ins] In [8]: topfilms = (
...: ratings.join(basics, "tconst")
...: .order_by([_.avg_rating.desc(), _.num_votes.desc()])
...: .filter(_.num_votes > 100_000)
...: )
[ins] In [9]: topfilms.execute(limit=10)
Out[9]:
tconst avg_rating num_votes primaryTitle startYear
0 tt0111161 9.3 2651547 The Shawshank Redemption 1994
1 tt0068646 9.2 1838044 The Godfather 1972
2 tt0468569 9.0 2623735 The Dark Knight 2008
3 tt0167260 9.0 1827464 The Lord of the Rings: The Return of the King 2003
4 tt0108052 9.0 1343647 Schindler's List 1993
5 tt0071562 9.0 1259465 The Godfather Part II 1974
6 tt0050083 9.0 782903 12 Angry Men 1957
7 tt0110912 8.9 2029684 Pulp Fiction 1994
8 tt15097216 8.9 199495 Jai Bhim 2021
9 tt1375666 8.8 2325417 Inception 2010
[ins] In [10]: con2 = ibis.postgres.connect(port=5438, user="postgres", password="postgres")
[ins] In [11]: con2.execute(topfilms, limit=10)
Out[11]:
tconst avg_rating num_votes primaryTitle startYear
0 tt0111161 9.3 2651547 The Shawshank Redemption 1994
1 tt0068646 9.2 1838044 The Godfather 1972
2 tt0468569 9.0 2623735 The Dark Knight 2008
3 tt0167260 9.0 1827464 The Lord of the Rings: The Return of the King 2003
4 tt0108052 9.0 1343647 Schindler's List 1993
5 tt0071562 9.0 1259465 The Godfather Part II 1974
6 tt0050083 9.0 782903 12 Angry Men 1957
7 tt0110912 8.9 2029684 Pulp Fiction 1994
8 tt15097216 8.9 199495 Jai Bhim 2021
9 tt1375666 8.8 2325417 Inception 2010
[ins] In [12]: con3 = ibis.sqlite.connect("/home/gil/data/databog/imdb.db")
[ins] In [13]: con3.execute(topfilms, limit=10)
Out[13]:
tconst avg_rating num_votes primaryTitle startYear
0 tt0111161 9.3 2651547 The Shawshank Redemption 1994
1 tt0068646 9.2 1838044 The Godfather 1972
2 tt0468569 9.0 2623735 The Dark Knight 2008
3 tt0167260 9.0 1827464 The Lord of the Rings: The Return of the King 2003
4 tt0108052 9.0 1343647 Schindler's List 1993
5 tt0071562 9.0 1259465 The Godfather Part II 1974
6 tt0050083 9.0 782903 12 Angry Men 1957
7 tt0110912 8.9 2029684 Pulp Fiction 1994
8 tt15097216 8.9 199495 Jai Bhim 2021
9 tt1375666 8.8 2325417 Inception 2010
Created
February 3, 2023 21:52
-
-
Save gforsyth/5a05f6a6ec7acd4698c0cadbbbf08b81 to your computer and use it in GitHub Desktop.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment