[simple-example][lg] - multiple datasets per repo are now possible

This commit is contained in:
Luccas Mateus de Medeiros Gomes
2023-04-22 14:05:21 -03:00
parent 6b2b5f5e87
commit 996568c0f9
20 changed files with 577 additions and 525 deletions

View File

@@ -1,11 +0,0 @@
import axios from "axios";
export default function loadUrlProxied(url: string) {
// HACK: duplicate of Excel code - maybe refactor
// if url is external may have CORS issue so we proxy it ...
if (url.startsWith("http")) {
const PROXY_URL = "/api/proxy";
url = PROXY_URL + "?url=" + encodeURIComponent(url);
}
return axios.get(url).then((res) => res.data);
}

View File

@@ -1,105 +0,0 @@
import matter from "gray-matter";
import mdxmermaid from "mdx-mermaid";
import { h } from "hastscript";
import remarkCallouts from "@flowershow/remark-callouts";
import remarkEmbed from "@flowershow/remark-embed";
import remarkGfm from "remark-gfm";
import remarkMath from "remark-math";
import remarkSmartypants from "remark-smartypants";
import remarkToc from "remark-toc";
import remarkWikiLink from "@flowershow/remark-wiki-link";
import rehypeAutolinkHeadings from "rehype-autolink-headings";
import rehypeKatex from "rehype-katex";
import rehypeSlug from "rehype-slug";
import rehypePrismPlus from "rehype-prism-plus";
import { serialize } from "next-mdx-remote/serialize";
/**
* Parse a markdown or MDX file to an MDX source form + front matter data
*
* @source: the contents of a markdown or mdx file
* @format: used to indicate to next-mdx-remote which format to use (md or mdx)
* @returns: { mdxSource: mdxSource, frontMatter: ...}
*/
const parse = async function (source, format) {
const { content, data, excerpt } = matter(source, {
excerpt: (file, options) => {
// Generate an excerpt for the file
file.excerpt = file.content.split("\n\n")[0];
},
});
const mdxSource = await serialize(
{ value: content, path: format },
{
// Optionally pass remark/rehype plugins
mdxOptions: {
remarkPlugins: [
remarkEmbed,
remarkGfm,
[remarkSmartypants, { quotes: false, dashes: "oldschool" }],
remarkMath,
remarkCallouts,
remarkWikiLink,
[
remarkToc,
{
heading: "Table of contents",
tight: true,
},
],
[mdxmermaid, {}],
],
rehypePlugins: [
rehypeSlug,
[
rehypeAutolinkHeadings,
{
properties: { className: 'heading-link' },
test(element) {
return (
["h2", "h3", "h4", "h5", "h6"].includes(element.tagName) &&
element.properties?.id !== "table-of-contents" &&
element.properties?.className !== "blockquote-heading"
);
},
content() {
return [
h(
"svg",
{
xmlns: "http:www.w3.org/2000/svg",
fill: "#ab2b65",
viewBox: "0 0 20 20",
className: "w-5 h-5",
},
[
h("path", {
fillRule: "evenodd",
clipRule: "evenodd",
d: "M9.493 2.853a.75.75 0 00-1.486-.205L7.545 6H4.198a.75.75 0 000 1.5h3.14l-.69 5H3.302a.75.75 0 000 1.5h3.14l-.435 3.148a.75.75 0 001.486.205L7.955 14h2.986l-.434 3.148a.75.75 0 001.486.205L12.456 14h3.346a.75.75 0 000-1.5h-3.14l.69-5h3.346a.75.75 0 000-1.5h-3.14l.435-3.147a.75.75 0 00-1.486-.205L12.045 6H9.059l.434-3.147zM8.852 7.5l-.69 5h2.986l.69-5H8.852z",
}),
]
),
];
},
},
],
[rehypeKatex, { output: "mathml" }],
[rehypePrismPlus, { ignoreMissing: true }],
],
format,
},
scope: data,
}
);
return {
mdxSource: mdxSource,
frontMatter: data,
excerpt,
};
};
export default parse;

View File

@@ -0,0 +1,147 @@
import { Octokit } from 'octokit';
export interface GithubProject {
owner: string;
repo: string;
branch: string;
files: string[];
readme: string;
description?: string;
name?: string;
}
export async function getProjectReadme(
owner: string,
repo: string,
branch: string,
readme: string,
github_pat?: string
) {
const octokit = new Octokit({ auth: github_pat });
try {
const response = await octokit.rest.repos.getContent({
owner,
repo,
path: readme,
ref: branch,
});
const data = response.data as { content?: string };
const fileContent = data.content ? data.content : '';
if (fileContent === '') {
return null;
}
const decodedContent = Buffer.from(fileContent, 'base64').toString();
return decodedContent;
} catch (error) {
console.log(error);
return null;
}
}
export async function getLastUpdated(
owner: string,
repo: string,
branch: string,
readme: string,
github_pat?: string
) {
const octokit = new Octokit({ auth: github_pat });
try {
const response = await octokit.rest.repos.listCommits({
owner,
repo,
path: readme,
ref: branch,
});
return response.data[0].commit.committer.date;
} catch (error) {
console.log(error);
return null;
}
}
export async function getProjectMetadata(
owner: string,
repo: string,
github_pat?: string
) {
const octokit = new Octokit({ auth: github_pat });
try {
const response = await octokit.rest.repos.get({
owner,
repo,
});
return response.data;
} catch (error) {
console.log(error);
return null;
}
}
export async function getRepoContents(
owner: string,
repo: string,
branch: string,
files: string[],
github_pat?: string
) {
const octokit = new Octokit({ auth: github_pat });
try {
const contents = [];
for (const path of files) {
const response = await octokit.rest.repos.getContent({
owner,
repo,
ref: branch,
path: path,
});
const data = response.data as { download_url?: string, name: string, size: number };
contents.push({ download_url: data.download_url, name: data.name, size: data.size});
}
return contents;
} catch (error) {
console.log(error);
return null;
}
}
export async function getProject(project: GithubProject, github_pat?: string) {
const projectMetadata = await getProjectMetadata(
project.owner,
project.repo,
github_pat
);
if (!projectMetadata) {
return null;
}
const projectReadme = await getProjectReadme(
project.owner,
project.repo,
project.branch,
project.readme,
github_pat
);
if (!projectReadme) {
return null;
}
const projectData = await getRepoContents(
project.owner,
project.repo,
project.branch,
project.files,
github_pat
);
if (!projectData) {
return null;
}
const projectBase = project.readme.split('/').length > 1
? project.readme.split('/').slice(0, -1).join('/')
: '/'
const last_updated = await getLastUpdated(
project.owner,
project.repo,
project.branch,
projectBase,
github_pat
);
return { ...projectMetadata, files: projectData, readmeContent: projectReadme, last_updated, base_path: projectBase };
}

View File

@@ -1,16 +0,0 @@
import papa from "papaparse";
const parseCsv = (csv) => {
csv = csv.trim();
const rawdata = papa.parse(csv, { header: true });
const cols = rawdata.meta.fields.map((r, i) => {
return { key: r, name: r };
});
return {
rows: rawdata.data,
fields: cols,
};
};
export default parseCsv;

View File

@@ -1,60 +0,0 @@
import * as crypto from "crypto";
import axios from "axios";
import { Octokit } from "octokit"
export default class Project {
id: string;
name: string;
owner: string;
github_repo: string;
readme: string;
metadata: any;
repo_metadata: any;
constructor(owner: string, name: string) {
this.name = name;
this.owner = owner;
this.github_repo = `https://github.com/${owner}/${name}`;
// TODO: using the GitHub repo to set the id is not a good idea
// since repos can be renamed and then we are going to end up with
// a duplicate
const encodedGHRepo = Buffer.from(this.github_repo, "utf-8").toString();
this.id = crypto.createHash("sha1").update(encodedGHRepo).digest("hex");
}
initFromGitHub = async () => {
const octokit = new Octokit()
// TODO: what if the repo doesn't exist?
await this.getFileContent("README.md")
.then((content) => (this.readme = content))
.catch((e) => (this.readme = null));
await this.getFileContent("datapackage.json")
.then((content) => (this.metadata = content))
.catch((e) => (this.metadata = {}));
const github_metadata = await octokit.rest.repos.get({ owner: this.owner, repo: this.name })
this.repo_metadata = github_metadata.data ? github_metadata.data : null
};
getFileContent = (path, branch = "main") => {
return axios
.get(
`https://raw.githubusercontent.com/${this.owner}/${this.name}/${branch}/${path}`
)
.then((res) => res.data);
};
serialize() {
return JSON.parse(JSON.stringify(this));
}
static async getFromGitHub(owner: string, name: string) {
const project = new Project(owner, name);
await project.initFromGitHub();
return project;
}
}

View File

@@ -1,47 +0,0 @@
export function convertSimpleToVegaLite(view, resource) {
const x = resource.schema.fields.find((f) => f.name === view.spec.group);
const y = resource.schema.fields.find((f) => f.name === view.spec.series[0]);
const xType = inferVegaType(x.type);
const yType = inferVegaType(y.type);
let vegaLiteSpec = {
$schema: "https://vega.github.io/schema/vega-lite/v5.json",
mark: {
type: view.spec.type,
color: "black",
strokeWidth: 1,
tooltip: true,
},
title: view.title,
width: 500,
height: 300,
selection: {
grid: {
type: "interval",
bind: "scales",
},
},
encoding: {
x: {
field: x.name,
type: xType,
},
y: {
field: y.name,
type: yType,
},
},
};
return vegaLiteSpec;
}
const inferVegaType = (fieldType) => {
switch (fieldType) {
case "date":
return "Temporal";
case "number":
return "Quantitative";
}
};