/*--------------------------------------------------------------------------------------------- * Copyright (c) Microsoft Corporation. All rights reserved. * Licensed under the MIT License. See License.txt in the project root for license information. *--------------------------------------------------------------------------------------------*/ import { nbformat } from '@jupyterlab/coreutils'; import { NotebookCell, NotebookCellData, NotebookCellKind, NotebookCellOutput } from 'vscode'; import { CellMetadata, CellOutputMetadata } from './common'; import { textMimeTypes } from './deserializers'; const textDecoder = new TextDecoder(); enum CellOutputMimeTypes { error = 'application/vnd.code.notebook.error', stderr = 'application/vnd.code.notebook.stderr', stdout = 'application/vnd.code.notebook.stdout' } export function createJupyterCellFromNotebookCell( vscCell: NotebookCellData ): nbformat.IRawCell | nbformat.IMarkdownCell | nbformat.ICodeCell { let cell: nbformat.IRawCell | nbformat.IMarkdownCell | nbformat.ICodeCell; if (vscCell.kind === NotebookCellKind.Markup) { cell = createMarkdownCellFromNotebookCell(vscCell); } else if (vscCell.languageId === 'raw') { cell = createRawCellFromNotebookCell(vscCell); } else { cell = createCodeCellFromNotebookCell(vscCell); } return cell; } /** * Sort the JSON to minimize unnecessary SCM changes. * Jupyter notbeooks/labs sorts the JSON keys in alphabetical order. * https://github.com/microsoft/vscode-python/issues/13155 */ export function sortObjectPropertiesRecursively(obj: any): any { if (Array.isArray(obj)) { return obj.map(sortObjectPropertiesRecursively); } if (obj !== undefined && obj !== null && typeof obj === 'object' && Object.keys(obj).length > 0) { return ( Object.keys(obj) .sort() .reduce>((sortedObj, prop) => { sortedObj[prop] = sortObjectPropertiesRecursively(obj[prop]); return sortedObj; }, {}) as any ); } return obj; } export function getCellMetadata(cell: NotebookCell | NotebookCellData) { return cell.metadata?.custom as CellMetadata | undefined; } function createCodeCellFromNotebookCell(cell: NotebookCellData): nbformat.ICodeCell { const cellMetadata = getCellMetadata(cell); const codeCell: any = { cell_type: 'code', execution_count: cell.executionSummary?.executionOrder ?? null, source: splitMultilineString(cell.value.replace(/\r\n/g, '\n')), outputs: (cell.outputs || []).map(translateCellDisplayOutput), metadata: cellMetadata?.metadata || {} // This cannot be empty. }; if (cellMetadata?.id) { codeCell.id = cellMetadata.id; } return codeCell; } function createRawCellFromNotebookCell(cell: NotebookCellData): nbformat.IRawCell { const cellMetadata = getCellMetadata(cell); const rawCell: any = { cell_type: 'raw', source: splitMultilineString(cell.value.replace(/\r\n/g, '\n')), metadata: cellMetadata?.metadata || {} // This cannot be empty. }; if (cellMetadata?.attachments) { rawCell.attachments = cellMetadata.attachments; } if (cellMetadata?.id) { rawCell.id = cellMetadata.id; } return rawCell; } function splitMultilineString(source: nbformat.MultilineString): string[] { if (Array.isArray(source)) { return source as string[]; } const str = source.toString(); if (str.length > 0) { // Each line should be a separate entry, but end with a \n if not last entry const arr = str.split('\n'); return arr .map((s, i) => { if (i < arr.length - 1) { return `${s}\n`; } return s; }) .filter(s => s.length > 0); // Skip last one if empty (it's the only one that could be length 0) } return []; } function translateCellDisplayOutput(output: NotebookCellOutput): JupyterOutput { const customMetadata = output.metadata as CellOutputMetadata | undefined; let result: JupyterOutput; // Possible some other extension added some output (do best effort to translate & save in ipynb). // In which case metadata might not contain `outputType`. const outputType = customMetadata?.outputType as nbformat.OutputType; switch (outputType) { case 'error': { result = translateCellErrorOutput(output); break; } case 'stream': { result = convertStreamOutput(output); break; } case 'display_data': { result = { output_type: 'display_data', data: output.items.reduce((prev: any, curr) => { prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array); return prev; }, {}), metadata: customMetadata?.metadata || {} // This can never be undefined. }; break; } case 'execute_result': { result = { output_type: 'execute_result', data: output.items.reduce((prev: any, curr) => { prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array); return prev; }, {}), metadata: customMetadata?.metadata || {}, // This can never be undefined. execution_count: typeof customMetadata?.executionCount === 'number' ? customMetadata?.executionCount : null // This can never be undefined, only a number or `null`. }; break; } case 'update_display_data': { result = { output_type: 'update_display_data', data: output.items.reduce((prev: any, curr) => { prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array); return prev; }, {}), metadata: customMetadata?.metadata || {} // This can never be undefined. }; break; } default: { const isError = output.items.length === 1 && output.items.every((item) => item.mime === CellOutputMimeTypes.error); const isStream = output.items.every( (item) => item.mime === CellOutputMimeTypes.stderr || item.mime === CellOutputMimeTypes.stdout ); if (isError) { return translateCellErrorOutput(output); } // In the case of .NET & other kernels, we need to ensure we save ipynb correctly. // Hence if we have stream output, save the output as Jupyter `stream` else `display_data` // Unless we already know its an unknown output type. const outputType: nbformat.OutputType = customMetadata?.outputType || (isStream ? 'stream' : 'display_data'); let unknownOutput: nbformat.IUnrecognizedOutput | nbformat.IDisplayData | nbformat.IStream; if (outputType === 'stream') { // If saving as `stream` ensure the mandatory properties are set. unknownOutput = convertStreamOutput(output); } else if (outputType === 'display_data') { // If saving as `display_data` ensure the mandatory properties are set. const displayData: nbformat.IDisplayData = { data: {}, metadata: {}, output_type: 'display_data' }; unknownOutput = displayData; } else { unknownOutput = { output_type: outputType }; } if (customMetadata?.metadata) { unknownOutput.metadata = customMetadata.metadata; } if (output.items.length > 0) { unknownOutput.data = output.items.reduce((prev: any, curr) => { prev[curr.mime] = convertOutputMimeToJupyterOutput(curr.mime, curr.data as Uint8Array); return prev; }, {}); } result = unknownOutput; break; } } // Account for transient data as well // `transient.display_id` is used to update cell output in other cells, at least thats one use case we know of. if (result && customMetadata && customMetadata.transient) { result.transient = customMetadata.transient; } return result; } function translateCellErrorOutput(output: NotebookCellOutput): nbformat.IError { // it should have at least one output item const firstItem = output.items[0]; // Bug in VS Code. if (!firstItem.data) { return { output_type: 'error', ename: '', evalue: '', traceback: [] }; } const originalError: undefined | nbformat.IError = output.metadata?.originalError; const value: Error = JSON.parse(textDecoder.decode(firstItem.data)); return { output_type: 'error', ename: value.name, evalue: value.message, // VS Code needs an `Error` object which requires a `stack` property as a string. // Its possible the format could change when converting from `traceback` to `string` and back again to `string` // When .NET stores errors in output (with their .NET kernel), // stack is empty, hence store the message instead of stack (so that somethign gets displayed in ipynb). traceback: originalError?.traceback || splitMultilineString(value.stack || value.message || '') }; } function getOutputStreamType(output: NotebookCellOutput): string | undefined { if (output.items.length > 0) { return output.items[0].mime === CellOutputMimeTypes.stderr ? 'stderr' : 'stdout'; } return; } type JupyterOutput = | nbformat.IUnrecognizedOutput | nbformat.IExecuteResult | nbformat.IDisplayData | nbformat.IStream | nbformat.IError; function convertStreamOutput(output: NotebookCellOutput): JupyterOutput { const outputs: string[] = []; output.items .filter((opit) => opit.mime === CellOutputMimeTypes.stderr || opit.mime === CellOutputMimeTypes.stdout) .map((opit) => textDecoder.decode(opit.data)) .forEach(value => { // Ensure each line is a seprate entry in an array (ending with \n). const lines = value.split('\n'); // If the last item in `outputs` is not empty and the first item in `lines` is not empty, then concate them. // As they are part of the same line. if (outputs.length && lines.length && lines[0].length > 0) { outputs[outputs.length - 1] = `${outputs[outputs.length - 1]}${lines.shift()!}`; } for (const line of lines) { outputs.push(line); } }); for (let index = 0; index < (outputs.length - 1); index++) { outputs[index] = `${outputs[index]}\n`; } // Skip last one if empty (it's the only one that could be length 0) if (outputs.length && outputs[outputs.length - 1].length === 0) { outputs.pop(); } const streamType = getOutputStreamType(output) || 'stdout'; return { output_type: 'stream', name: streamType, text: outputs }; } function convertOutputMimeToJupyterOutput(mime: string, value: Uint8Array) { if (!value) { return ''; } try { if (mime === CellOutputMimeTypes.error) { const stringValue = textDecoder.decode(value); return JSON.parse(stringValue); } else if (mime.startsWith('text/') || textMimeTypes.includes(mime)) { const stringValue = textDecoder.decode(value); return splitMultilineString(stringValue); } else if (mime.startsWith('image/') && mime !== 'image/svg+xml') { // Images in Jupyter are stored in base64 encoded format. // VS Code expects bytes when rendering images. if (typeof Buffer !== 'undefined' && typeof Buffer.from === 'function') { return Buffer.from(value).toString('base64'); } else { return btoa(value.reduce((s: string, b: number) => s + String.fromCharCode(b), '')); } } else if (mime.toLowerCase().includes('json')) { const stringValue = textDecoder.decode(value); return stringValue.length > 0 ? JSON.parse(stringValue) : stringValue; } else { const stringValue = textDecoder.decode(value); return stringValue; } } catch (ex) { return ''; } } function createMarkdownCellFromNotebookCell(cell: NotebookCellData): nbformat.IMarkdownCell { const cellMetadata = getCellMetadata(cell); const markdownCell: any = { cell_type: 'markdown', source: splitMultilineString(cell.value.replace(/\r\n/g, '\n')), metadata: cellMetadata?.metadata || {} // This cannot be empty. }; if (cellMetadata?.attachments) { markdownCell.attachments = cellMetadata.attachments; } if (cellMetadata?.id) { markdownCell.id = cellMetadata.id; } return markdownCell; } export function pruneCell(cell: nbformat.ICell): nbformat.ICell { // Source is usually a single string on input. Convert back to an array const result = { ...cell, source: splitMultilineString(cell.source) } as nbformat.ICell; // Remove outputs and execution_count from non code cells if (result.cell_type !== 'code') { delete (result).outputs; delete (result).execution_count; } else { // Clean outputs from code cells result.outputs = result.outputs ? (result.outputs as nbformat.IOutput[]).map(fixupOutput) : []; } return result; } const dummyStreamObj: nbformat.IStream = { output_type: 'stream', name: 'stdout', text: '' }; const dummyErrorObj: nbformat.IError = { output_type: 'error', ename: '', evalue: '', traceback: [''] }; const dummyDisplayObj: nbformat.IDisplayData = { output_type: 'display_data', data: {}, metadata: {} }; const dummyExecuteResultObj: nbformat.IExecuteResult = { output_type: 'execute_result', name: '', execution_count: 0, data: {}, metadata: {} }; const AllowedCellOutputKeys = { ['stream']: new Set(Object.keys(dummyStreamObj)), ['error']: new Set(Object.keys(dummyErrorObj)), ['display_data']: new Set(Object.keys(dummyDisplayObj)), ['execute_result']: new Set(Object.keys(dummyExecuteResultObj)) }; function fixupOutput(output: nbformat.IOutput): nbformat.IOutput { let allowedKeys: Set; switch (output.output_type) { case 'stream': case 'error': case 'execute_result': case 'display_data': allowedKeys = AllowedCellOutputKeys[output.output_type]; break; default: return output; } const result = { ...output }; for (const k of Object.keys(output)) { if (!allowedKeys.has(k)) { delete result[k]; } } return result; }