Last active
October 17, 2023 08:41
-
-
Save navidshad/973e9c594a63838d1ebb8f2c2495cf87 to your computer and use it in GitHub Desktop.
Read all data from Firestore by a cursor and resolve the Bandwidth Exhausted error
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
How can we read all data from a firestore collection? | |
Problem: | |
When you are reading a hug collection doc by doc from Firebase these problems happens for you: | |
1. firestore cursor is not proper as a powerful cursor as to be able make this task done. | |
2. after few thousands request you will get Bandwidth Exhausted error. | |
Solution | |
You need 2 hings for resolving abou problems: | |
1. A powerfull cursor | |
2. A child-process manager | |
Why cursor? | |
firebase default cursor cant read all data automatically and needs your interaction to tell it go next part, gor next part... | |
so we need a cursor to tell firebase cursor go next part automatically. | |
Why child-process? | |
After reading thousands docs Bandwidth Exhausted error will happened, | |
I figured out if we stop running process and run a new process we can pass the error and continue the job. | |
so we need a child-process manager to do it for us. | |
For implementing these two things we need two scripts: parent (index.js) and child (cursor.js) | |
Parent is the child-process manager. Child is the cursor. they can send message for each other and have colaboration. | |
This is the scenario: | |
- Parent got collection detail from developer | |
- Then run a cursor as a child-process and provides some basic info about the collection | |
- Then child goes through a collection and read documents until it gets [8 RESOURCE_EXHAUSTED] error, then send a message to parent to inform it from the error. | |
- Then parent kills child and create a new one and tells it where to start reading again. | |
*/ | |
// | |
// Parent Script ============================= | |
// =========================================== | |
// you can call it index.js | |
// | |
const childProcess = require('child_process'); | |
const path = require('path'); | |
async function readAllDocs({ | |
// collection name | |
collection, | |
// you should provide a key to sort all docs by it | |
orderBy, | |
// total documents on each request | |
limit = 1000, | |
// it's a callback being called per document | |
onDoc, | |
}) { | |
return new Promise(async (done) => { | |
let allowContinue = true; | |
let lastId = null; | |
let counter = 0; | |
// | |
// This is a scoped function being called continuously | |
// it will run the child script and listen to its messages | |
// each time the error of Bandwidth Exhausted happened it kills the child script and run it again | |
// until there were no documents anymore | |
const runChild = () => { | |
console.log('run cursor for', collection); | |
return new Promise(async (resolve) => { | |
// Running new child process | |
const child = childProcess | |
.fork( | |
// path to child script | |
path.join(__dirname, 'cursor.js'), | |
// process options | |
{ | |
// pass a set of environment variables | |
// to child process | |
env: { | |
...process.env, | |
lastId, collection, limit, orderBy | |
}, | |
}, | |
) | |
// listen to child messages | |
child.on('message', async (data) => { | |
// When message contains a document | |
if (data.type == 'DOC') { | |
if (onDoc) onDoc(data.doc, counter); | |
counter++ | |
} | |
// When reading collection has been done | |
else if (data.type == 'DONE') { | |
child.kill() | |
// make this false to stop the while cycle | |
allowContinue = false | |
resolve() | |
} | |
// When "Bandwidth Exhausted" error happened | |
// this child will be killed | |
else if (data.type == 'ERROR') { | |
child.kill() | |
// make this true to continue the while cycle | |
allowContinue = true; | |
// store last document id for running a new child | |
lastId = data.lastId; | |
resolve(); | |
} | |
}) | |
}) | |
} | |
// start new child process while condition is true | |
while (allowContinue) { | |
await runChild(); | |
} | |
// stop reading | |
done(); | |
}) | |
} | |
// Now you can start to read a collection | |
readAllDocs({ | |
collection: 'users', | |
orderBy: 'uid', | |
onDoc: (doc, index) => { | |
// do something with the current doc | |
} | |
}) | |
// | |
// Child Script ============================== | |
// =========================================== | |
// you can call it cursor.js | |
// | |
const admin = require("firebase-admin"); | |
admin.initializeApp({}); | |
async function runCursor({ | |
// collection name | |
collection, | |
// total documents on each call | |
limit = 1000, | |
// on document read | |
onDoc, | |
onDone, | |
}) { | |
let lastDoc; | |
let lastId = process.env.lastId || null; | |
let allowGoAhead = true; | |
let orderBy = process.env.orderBy; | |
if (lastId) { | |
// Get last document from last killed process | |
await admin.firestore().collection(collection).doc(lastId).get() | |
.then(sp => { | |
if (sp.exists) lastDoc = sp | |
}) | |
} | |
// this is a inner function | |
// it will be used in while section | |
const getDocs = () => { | |
let query = admin.firestore().collection(collection).orderBy(orderBy).limit(limit) | |
if (lastDoc) { | |
// define where to start to read | |
// last doc exists | |
query = query.startAfter(lastDoc) | |
} | |
return query.get().then(sp => { | |
if (sp.docs.length > 0) { | |
for (let i = 0; i < sp.docs.length; i++) { | |
const doc = sp.docs[i]; | |
// run onDoc call back | |
if (onDoc) onDoc(doc); | |
} | |
// define end of this part | |
lastDoc = sp.docs[sp.docs.length - 1] | |
// continue the cursor | |
allowGoAhead = true | |
} else { | |
// stop cursor if there is not more docs | |
allowGoAhead = false; | |
} | |
}).catch(error => { | |
console.log(error); | |
// Inform parent process from this error | |
process.send({ type: 'ERROR', lastId: lastDoc.id, error }); | |
}) | |
} | |
while (allowGoAhead) { | |
await getDocs(); | |
} | |
onDone(); | |
} | |
runCursor({ | |
collection: process.env.collection, | |
limit: parseInt(process.env.limit), | |
onDoc: (doc) => { | |
process.send({ type: 'DOC', doc: doc.data() }); | |
}, | |
onDone: () => { | |
process.send({ type: 'DONE' }); | |
} | |
}); | |
/* | |
That's it | |
Use these scripts to read all documents from a firestore collection | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment