Last active
October 10, 2019 21:44
-
-
Save nlw0/0549c9fafa5e6c259e47a73e913c1185 to your computer and use it in GitHub Desktop.
Counting files based on libuv's readdir and scandir via C and Julia
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "uv.h" | |
#include "stdio.h" | |
#include "string.h" | |
#include <sys/time.h> | |
int min(int a, int b) { return (a < b)? a : b; } | |
int get_file_count_read(const char path[], int chunklen) { | |
uv_fs_t readdir_req; | |
uv_fs_opendir(NULL, &readdir_req, path, NULL); | |
uv_dirent_t dirents[1024]; | |
uv_dir_t* rdir = readdir_req.ptr; | |
rdir->dirents = dirents; | |
rdir->nentries = min(1024,chunklen); | |
int acc = 0; | |
char fullname[1024]; | |
strcpy(fullname, path); | |
char * filebase = fullname+strlen(path)+1; | |
*(filebase-1)='/'; | |
for(;;) { | |
int r = uv_fs_readdir(NULL, &readdir_req, readdir_req.ptr, NULL); | |
if (r <= 0) | |
break; | |
for (int i=0; i<min(chunklen, r); i++) { | |
if (dirents[i].type == UV_DIRENT_DIR) { | |
strcpy(filebase, dirents[i].name); | |
acc += get_file_count_read(fullname, chunklen); | |
} else if (dirents[i].type == UV_DIRENT_FILE) { | |
acc += 1; | |
} | |
} | |
} | |
uv_fs_closedir(NULL, &readdir_req, readdir_req.ptr, NULL); | |
return acc; | |
} | |
int get_file_count_scan(const char path[], int chunklen) { | |
uv_fs_t readdir_req; | |
uv_fs_scandir(NULL, &readdir_req, path, 0, NULL); | |
uv_dirent_t dirent; | |
int acc = 0; | |
char fullname[1024]; | |
strcpy(fullname, path); | |
char * filebase = fullname+strlen(path)+1; | |
*(filebase-1)='/'; | |
for(;;) { | |
int r = uv_fs_scandir_next(&readdir_req, &dirent); | |
if (r == UV_EOF) | |
break; | |
if (dirent.type == UV_DIRENT_DIR) { | |
strcpy(filebase, dirent.name); | |
acc += get_file_count_scan(fullname, chunklen); | |
} else if (dirent.type == UV_DIRENT_FILE) { | |
acc += 1; | |
} | |
} | |
uv_fs_req_cleanup(&readdir_req); | |
return acc; | |
} | |
int main(int argc, char** argv) { | |
int niter=3; | |
double t1 = (float)clock()/CLOCKS_PER_SEC; | |
for (int i=0; i<niter; i++) { | |
get_file_count_read(argv[1], 11111); | |
} | |
t1 = (float)clock()/CLOCKS_PER_SEC - t1; | |
printf("readdir file count %f\n", t1/niter); | |
double t2 = (float)clock()/CLOCKS_PER_SEC; | |
for (int i=0; i<niter; i++) { | |
get_file_count_scan(argv[1], 22); | |
} | |
t2 = (float)clock()/CLOCKS_PER_SEC - t1; | |
printf("scandir file count %f\n", t2/niter); | |
return 0; | |
} | |
// 2560000 files - 36% | |
// readdir file count 1.249874 | |
// scandir file count 3.428333 | |
// 640000 files - 43% | |
// readdir file count 0.321972 | |
// scandir file count 0.748858 | |
// 800*800 files - 86% | |
// readdir file count 0.330214 | |
// scandir file count 0.383865 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using MD5 | |
using BenchmarkTools | |
function generate_tree(treeroot, N, silly=false) | |
mkdir(treeroot) | |
J = if silly 1 else N end | |
K = if silly N*N else N end | |
for j in 1:J | |
dirname = treeroot * "/" * bytes2hex(md5("$j")) | |
mkdir(dirname) | |
for k in 1:K | |
filename = dirname * "/" * bytes2hex(md5("$j$k")) * ".png" | |
open(filename, "w") do io | |
write(io, join(rand('a':'z', rand(8:16)))) | |
end | |
end | |
end | |
end | |
function get_count_readdir(dir) | |
acc = 0 | |
for entry in readdir(dir) | |
# fullname = dir*"/"*entry | |
# if isdir(fullname) | |
# acc += get_count_readdir(fullname) | |
# else | |
acc += 1 | |
# end | |
end | |
acc | |
end | |
function get_count_lazyreaddir(dir) | |
acc = 0 | |
for entry in lazyreaddir(dir) | |
# fullname = dir*"/"*entry | |
# if isdir(fullname) | |
# acc += get_count_lazyreaddir(fullname) | |
# else | |
acc += 1 | |
# end | |
end | |
acc | |
end | |
# treeroot = "/tmp/testdir" | |
treeroot = "/tmp/testdir/c4ca4238a0b923820dcc509a6f75849b" | |
generate_tree(treeroot, 1600, true) | |
total = @btime get_count_lazyreaddir(treeroot) | |
total = @btime get_count_readdir(treeroot) | |
#6.215 s (10240035 allocations: 273.44 MiB) | |
#3.873 s (2560024 allocations: 189.25 MiB) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment