初始化环境文件

This commit is contained in:
CN-JS-HuiBai
2026-04-04 12:49:09 +08:00
parent 07742d2688
commit c607af6fac
5971 changed files with 515160 additions and 18 deletions

View File

@@ -0,0 +1,410 @@
import { isName } from '../util.js';
export default class DocTypeReader {
constructor(options) {
this.suppressValidationErr = !options;
this.options = options;
}
readDocType(xmlData, i) {
const entities = Object.create(null);
let entityCount = 0;
if (xmlData[i + 3] === 'O' &&
xmlData[i + 4] === 'C' &&
xmlData[i + 5] === 'T' &&
xmlData[i + 6] === 'Y' &&
xmlData[i + 7] === 'P' &&
xmlData[i + 8] === 'E') {
i = i + 9;
let angleBracketsCount = 1;
let hasBody = false, comment = false;
let exp = "";
for (; i < xmlData.length; i++) {
if (xmlData[i] === '<' && !comment) { //Determine the tag type
if (hasBody && hasSeq(xmlData, "!ENTITY", i)) {
i += 7;
let entityName, val;
[entityName, val, i] = this.readEntityExp(xmlData, i + 1, this.suppressValidationErr);
if (val.indexOf("&") === -1) { //Parameter entities are not supported
if (this.options.enabled !== false &&
this.options.maxEntityCount != null &&
entityCount >= this.options.maxEntityCount) {
throw new Error(
`Entity count (${entityCount + 1}) exceeds maximum allowed (${this.options.maxEntityCount})`
);
}
//const escaped = entityName.replace(/[.\-+*:]/g, '\\.');
const escaped = entityName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
entities[entityName] = {
regx: RegExp(`&${escaped};`, "g"),
val: val
};
entityCount++;
}
}
else if (hasBody && hasSeq(xmlData, "!ELEMENT", i)) {
i += 8;//Not supported
const { index } = this.readElementExp(xmlData, i + 1);
i = index;
} else if (hasBody && hasSeq(xmlData, "!ATTLIST", i)) {
i += 8;//Not supported
// const {index} = this.readAttlistExp(xmlData,i+1);
// i = index;
} else if (hasBody && hasSeq(xmlData, "!NOTATION", i)) {
i += 9;//Not supported
const { index } = this.readNotationExp(xmlData, i + 1, this.suppressValidationErr);
i = index;
} else if (hasSeq(xmlData, "!--", i)) comment = true;
else throw new Error(`Invalid DOCTYPE`);
angleBracketsCount++;
exp = "";
} else if (xmlData[i] === '>') { //Read tag content
if (comment) {
if (xmlData[i - 1] === "-" && xmlData[i - 2] === "-") {
comment = false;
angleBracketsCount--;
}
} else {
angleBracketsCount--;
}
if (angleBracketsCount === 0) {
break;
}
} else if (xmlData[i] === '[') {
hasBody = true;
} else {
exp += xmlData[i];
}
}
if (angleBracketsCount !== 0) {
throw new Error(`Unclosed DOCTYPE`);
}
} else {
throw new Error(`Invalid Tag instead of DOCTYPE`);
}
return { entities, i };
}
readEntityExp(xmlData, i) {
//External entities are not supported
// <!ENTITY ext SYSTEM "http://normal-website.com" >
//Parameter entities are not supported
// <!ENTITY entityname "&anotherElement;">
//Internal entities are supported
// <!ENTITY entityname "replacement text">
// Skip leading whitespace after <!ENTITY
i = skipWhitespace(xmlData, i);
// Read entity name
const startIndex = i;
while (i < xmlData.length && !/\s/.test(xmlData[i]) && xmlData[i] !== '"' && xmlData[i] !== "'") {
i++;
}
let entityName = xmlData.substring(startIndex, i);
validateEntityName(entityName);
// Skip whitespace after entity name
i = skipWhitespace(xmlData, i);
// Check for unsupported constructs (external entities or parameter entities)
if (!this.suppressValidationErr) {
if (xmlData.substring(i, i + 6).toUpperCase() === "SYSTEM") {
throw new Error("External entities are not supported");
} else if (xmlData[i] === "%") {
throw new Error("Parameter entities are not supported");
}
}
// Read entity value (internal entity)
let entityValue = "";
[i, entityValue] = this.readIdentifierVal(xmlData, i, "entity");
// Validate entity size
if (this.options.enabled !== false &&
this.options.maxEntitySize != null &&
entityValue.length > this.options.maxEntitySize) {
throw new Error(
`Entity "${entityName}" size (${entityValue.length}) exceeds maximum allowed size (${this.options.maxEntitySize})`
);
}
i--;
return [entityName, entityValue, i];
}
readNotationExp(xmlData, i) {
// Skip leading whitespace after <!NOTATION
i = skipWhitespace(xmlData, i);
// Read notation name
const startIndex = i;
while (i < xmlData.length && !/\s/.test(xmlData[i])) {
i++;
}
let notationName = xmlData.substring(startIndex, i);
!this.suppressValidationErr && validateEntityName(notationName);
// Skip whitespace after notation name
i = skipWhitespace(xmlData, i);
// Check identifier type (SYSTEM or PUBLIC)
const identifierType = xmlData.substring(i, i + 6).toUpperCase();
if (!this.suppressValidationErr && identifierType !== "SYSTEM" && identifierType !== "PUBLIC") {
throw new Error(`Expected SYSTEM or PUBLIC, found "${identifierType}"`);
}
i += identifierType.length;
// Skip whitespace after identifier type
i = skipWhitespace(xmlData, i);
// Read public identifier (if PUBLIC)
let publicIdentifier = null;
let systemIdentifier = null;
if (identifierType === "PUBLIC") {
[i, publicIdentifier] = this.readIdentifierVal(xmlData, i, "publicIdentifier");
// Skip whitespace after public identifier
i = skipWhitespace(xmlData, i);
// Optionally read system identifier
if (xmlData[i] === '"' || xmlData[i] === "'") {
[i, systemIdentifier] = this.readIdentifierVal(xmlData, i, "systemIdentifier");
}
} else if (identifierType === "SYSTEM") {
// Read system identifier (mandatory for SYSTEM)
[i, systemIdentifier] = this.readIdentifierVal(xmlData, i, "systemIdentifier");
if (!this.suppressValidationErr && !systemIdentifier) {
throw new Error("Missing mandatory system identifier for SYSTEM notation");
}
}
return { notationName, publicIdentifier, systemIdentifier, index: --i };
}
readIdentifierVal(xmlData, i, type) {
let identifierVal = "";
const startChar = xmlData[i];
if (startChar !== '"' && startChar !== "'") {
throw new Error(`Expected quoted string, found "${startChar}"`);
}
i++;
const startIndex = i;
while (i < xmlData.length && xmlData[i] !== startChar) {
i++;
}
identifierVal = xmlData.substring(startIndex, i);
if (xmlData[i] !== startChar) {
throw new Error(`Unterminated ${type} value`);
}
i++;
return [i, identifierVal];
}
readElementExp(xmlData, i) {
// <!ELEMENT br EMPTY>
// <!ELEMENT div ANY>
// <!ELEMENT title (#PCDATA)>
// <!ELEMENT book (title, author+)>
// <!ELEMENT name (content-model)>
// Skip leading whitespace after <!ELEMENT
i = skipWhitespace(xmlData, i);
// Read element name
const startIndex = i;
while (i < xmlData.length && !/\s/.test(xmlData[i])) {
i++;
}
let elementName = xmlData.substring(startIndex, i);
// Validate element name
if (!this.suppressValidationErr && !isName(elementName)) {
throw new Error(`Invalid element name: "${elementName}"`);
}
// Skip whitespace after element name
i = skipWhitespace(xmlData, i);
let contentModel = "";
// Expect '(' to start content model
if (xmlData[i] === "E" && hasSeq(xmlData, "MPTY", i)) i += 4;
else if (xmlData[i] === "A" && hasSeq(xmlData, "NY", i)) i += 2;
else if (xmlData[i] === "(") {
i++; // Move past '('
// Read content model
const startIndex = i;
while (i < xmlData.length && xmlData[i] !== ")") {
i++;
}
contentModel = xmlData.substring(startIndex, i);
if (xmlData[i] !== ")") {
throw new Error("Unterminated content model");
}
} else if (!this.suppressValidationErr) {
throw new Error(`Invalid Element Expression, found "${xmlData[i]}"`);
}
return {
elementName,
contentModel: contentModel.trim(),
index: i
};
}
readAttlistExp(xmlData, i) {
// Skip leading whitespace after <!ATTLIST
i = skipWhitespace(xmlData, i);
// Read element name
let startIndex = i;
while (i < xmlData.length && !/\s/.test(xmlData[i])) {
i++;
}
let elementName = xmlData.substring(startIndex, i);
// Validate element name
validateEntityName(elementName)
// Skip whitespace after element name
i = skipWhitespace(xmlData, i);
// Read attribute name
startIndex = i;
while (i < xmlData.length && !/\s/.test(xmlData[i])) {
i++;
}
let attributeName = xmlData.substring(startIndex, i);
// Validate attribute name
if (!validateEntityName(attributeName)) {
throw new Error(`Invalid attribute name: "${attributeName}"`);
}
// Skip whitespace after attribute name
i = skipWhitespace(xmlData, i);
// Read attribute type
let attributeType = "";
if (xmlData.substring(i, i + 8).toUpperCase() === "NOTATION") {
attributeType = "NOTATION";
i += 8; // Move past "NOTATION"
// Skip whitespace after "NOTATION"
i = skipWhitespace(xmlData, i);
// Expect '(' to start the list of notations
if (xmlData[i] !== "(") {
throw new Error(`Expected '(', found "${xmlData[i]}"`);
}
i++; // Move past '('
// Read the list of allowed notations
let allowedNotations = [];
while (i < xmlData.length && xmlData[i] !== ")") {
const startIndex = i;
while (i < xmlData.length && xmlData[i] !== "|" && xmlData[i] !== ")") {
i++;
}
let notation = xmlData.substring(startIndex, i);
// Validate notation name
notation = notation.trim();
if (!validateEntityName(notation)) {
throw new Error(`Invalid notation name: "${notation}"`);
}
allowedNotations.push(notation);
// Skip '|' separator or exit loop
if (xmlData[i] === "|") {
i++; // Move past '|'
i = skipWhitespace(xmlData, i); // Skip optional whitespace after '|'
}
}
if (xmlData[i] !== ")") {
throw new Error("Unterminated list of notations");
}
i++; // Move past ')'
// Store the allowed notations as part of the attribute type
attributeType += " (" + allowedNotations.join("|") + ")";
} else {
// Handle simple types (e.g., CDATA, ID, IDREF, etc.)
const startIndex = i;
while (i < xmlData.length && !/\s/.test(xmlData[i])) {
i++;
}
attributeType += xmlData.substring(startIndex, i);
// Validate simple attribute type
const validTypes = ["CDATA", "ID", "IDREF", "IDREFS", "ENTITY", "ENTITIES", "NMTOKEN", "NMTOKENS"];
if (!this.suppressValidationErr && !validTypes.includes(attributeType.toUpperCase())) {
throw new Error(`Invalid attribute type: "${attributeType}"`);
}
}
// Skip whitespace after attribute type
i = skipWhitespace(xmlData, i);
// Read default value
let defaultValue = "";
if (xmlData.substring(i, i + 8).toUpperCase() === "#REQUIRED") {
defaultValue = "#REQUIRED";
i += 8;
} else if (xmlData.substring(i, i + 7).toUpperCase() === "#IMPLIED") {
defaultValue = "#IMPLIED";
i += 7;
} else {
[i, defaultValue] = this.readIdentifierVal(xmlData, i, "ATTLIST");
}
return {
elementName,
attributeName,
attributeType,
defaultValue,
index: i
}
}
}
const skipWhitespace = (data, index) => {
while (index < data.length && /\s/.test(data[index])) {
index++;
}
return index;
};
function hasSeq(data, seq, i) {
for (let j = 0; j < seq.length; j++) {
if (seq[j] !== data[i + j + 1]) return false;
}
return true;
}
function validateEntityName(name) {
if (isName(name))
return name;
else
throw new Error(`Invalid entity name ${name}`);
}

View File

@@ -0,0 +1,159 @@
import { DANGEROUS_PROPERTY_NAMES, criticalProperties } from "../util.js";
const defaultOnDangerousProperty = (name) => {
if (DANGEROUS_PROPERTY_NAMES.includes(name)) {
return "__" + name;
}
return name;
};
export const defaultOptions = {
preserveOrder: false,
attributeNamePrefix: '@_',
attributesGroupName: false,
textNodeName: '#text',
ignoreAttributes: true,
removeNSPrefix: false, // remove NS from tag name or attribute name if true
allowBooleanAttributes: false, //a tag can have attributes without any value
//ignoreRootElement : false,
parseTagValue: true,
parseAttributeValue: false,
trimValues: true, //Trim string values of tag and attributes
cdataPropName: false,
numberParseOptions: {
hex: true,
leadingZeros: true,
eNotation: true
},
tagValueProcessor: function (tagName, val) {
return val;
},
attributeValueProcessor: function (attrName, val) {
return val;
},
stopNodes: [], //nested tags will not be parsed even for errors
alwaysCreateTextNode: false,
isArray: () => false,
commentPropName: false,
unpairedTags: [],
processEntities: true,
htmlEntities: false,
ignoreDeclaration: false,
ignorePiTags: false,
transformTagName: false,
transformAttributeName: false,
updateTag: function (tagName, jPath, attrs) {
return tagName
},
// skipEmptyListItem: false
captureMetaData: false,
maxNestedTags: 100,
strictReservedNames: true,
jPath: true, // if true, pass jPath string to callbacks; if false, pass matcher instance
onDangerousProperty: defaultOnDangerousProperty
};
/**
* Validates that a property name is safe to use
* @param {string} propertyName - The property name to validate
* @param {string} optionName - The option field name (for error message)
* @throws {Error} If property name is dangerous
*/
function validatePropertyName(propertyName, optionName) {
if (typeof propertyName !== 'string') {
return; // Only validate string property names
}
const normalized = propertyName.toLowerCase();
if (DANGEROUS_PROPERTY_NAMES.some(dangerous => normalized === dangerous.toLowerCase())) {
throw new Error(
`[SECURITY] Invalid ${optionName}: "${propertyName}" is a reserved JavaScript keyword that could cause prototype pollution`
);
}
if (criticalProperties.some(dangerous => normalized === dangerous.toLowerCase())) {
throw new Error(
`[SECURITY] Invalid ${optionName}: "${propertyName}" is a reserved JavaScript keyword that could cause prototype pollution`
);
}
}
/**
* Normalizes processEntities option for backward compatibility
* @param {boolean|object} value
* @returns {object} Always returns normalized object
*/
function normalizeProcessEntities(value) {
// Boolean backward compatibility
if (typeof value === 'boolean') {
return {
enabled: value, // true or false
maxEntitySize: 10000,
maxExpansionDepth: 10,
maxTotalExpansions: 1000,
maxExpandedLength: 100000,
maxEntityCount: 100,
allowedTags: null,
tagFilter: null
};
}
// Object config - merge with defaults
if (typeof value === 'object' && value !== null) {
return {
enabled: value.enabled !== false,
maxEntitySize: Math.max(1, value.maxEntitySize ?? 10000),
maxExpansionDepth: Math.max(1, value.maxExpansionDepth ?? 10),
maxTotalExpansions: Math.max(1, value.maxTotalExpansions ?? 1000),
maxExpandedLength: Math.max(1, value.maxExpandedLength ?? 100000),
maxEntityCount: Math.max(1, value.maxEntityCount ?? 100),
allowedTags: value.allowedTags ?? null,
tagFilter: value.tagFilter ?? null
};
}
// Default to enabled with limits
return normalizeProcessEntities(true);
}
export const buildOptions = function (options) {
const built = Object.assign({}, defaultOptions, options);
// Validate property names to prevent prototype pollution
const propertyNameOptions = [
{ value: built.attributeNamePrefix, name: 'attributeNamePrefix' },
{ value: built.attributesGroupName, name: 'attributesGroupName' },
{ value: built.textNodeName, name: 'textNodeName' },
{ value: built.cdataPropName, name: 'cdataPropName' },
{ value: built.commentPropName, name: 'commentPropName' }
];
for (const { value, name } of propertyNameOptions) {
if (value) {
validatePropertyName(value, name);
}
}
if (built.onDangerousProperty === null) {
built.onDangerousProperty = defaultOnDangerousProperty;
}
// Always normalize processEntities for backward compatibility and validation
built.processEntities = normalizeProcessEntities(built.processEntities);
// Convert old-style stopNodes for backward compatibility
if (built.stopNodes && Array.isArray(built.stopNodes)) {
built.stopNodes = built.stopNodes.map(node => {
if (typeof node === 'string' && node.startsWith('*.')) {
// Old syntax: *.tagname meant "tagname anywhere"
// Convert to new syntax: ..tagname
return '..' + node.substring(2);
}
return node;
});
}
//console.debug(built.processEntities)
return built;
};

View File

@@ -0,0 +1,911 @@
'use strict';
///@ts-check
import { getAllMatches, isExist, DANGEROUS_PROPERTY_NAMES, criticalProperties } from '../util.js';
import xmlNode from './xmlNode.js';
import DocTypeReader from './DocTypeReader.js';
import toNumber from "strnum";
import getIgnoreAttributesFn from "../ignoreAttributes.js";
import { Expression, Matcher } from 'path-expression-matcher';
// const regx =
// '<((!\\[CDATA\\[([\\s\\S]*?)(]]>))|((NAME:)?(NAME))([^>]*)>|((\\/)(NAME)\\s*>))([^<]*)'
// .replace(/NAME/g, util.nameRegexp);
//const tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g");
//const tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g");
// Helper functions for attribute and namespace handling
/**
* Extract raw attributes (without prefix) from prefixed attribute map
* @param {object} prefixedAttrs - Attributes with prefix from buildAttributesMap
* @param {object} options - Parser options containing attributeNamePrefix
* @returns {object} Raw attributes for matcher
*/
function extractRawAttributes(prefixedAttrs, options) {
if (!prefixedAttrs) return {};
// Handle attributesGroupName option
const attrs = options.attributesGroupName
? prefixedAttrs[options.attributesGroupName]
: prefixedAttrs;
if (!attrs) return {};
const rawAttrs = {};
for (const key in attrs) {
// Remove the attribute prefix to get raw name
if (key.startsWith(options.attributeNamePrefix)) {
const rawName = key.substring(options.attributeNamePrefix.length);
rawAttrs[rawName] = attrs[key];
} else {
// Attribute without prefix (shouldn't normally happen, but be safe)
rawAttrs[key] = attrs[key];
}
}
return rawAttrs;
}
/**
* Extract namespace from raw tag name
* @param {string} rawTagName - Tag name possibly with namespace (e.g., "soap:Envelope")
* @returns {string|undefined} Namespace or undefined
*/
function extractNamespace(rawTagName) {
if (!rawTagName || typeof rawTagName !== 'string') return undefined;
const colonIndex = rawTagName.indexOf(':');
if (colonIndex !== -1 && colonIndex > 0) {
const ns = rawTagName.substring(0, colonIndex);
// Don't treat xmlns as a namespace
if (ns !== 'xmlns') {
return ns;
}
}
return undefined;
}
export default class OrderedObjParser {
constructor(options) {
this.options = options;
this.currentNode = null;
this.tagsNodeStack = [];
this.docTypeEntities = {};
this.lastEntities = {
"apos": { regex: /&(apos|#39|#x27);/g, val: "'" },
"gt": { regex: /&(gt|#62|#x3E);/g, val: ">" },
"lt": { regex: /&(lt|#60|#x3C);/g, val: "<" },
"quot": { regex: /&(quot|#34|#x22);/g, val: "\"" },
};
this.ampEntity = { regex: /&(amp|#38|#x26);/g, val: "&" };
this.htmlEntities = {
"space": { regex: /&(nbsp|#160);/g, val: " " },
// "lt" : { regex: /&(lt|#60);/g, val: "<" },
// "gt" : { regex: /&(gt|#62);/g, val: ">" },
// "amp" : { regex: /&(amp|#38);/g, val: "&" },
// "quot" : { regex: /&(quot|#34);/g, val: "\"" },
// "apos" : { regex: /&(apos|#39);/g, val: "'" },
"cent": { regex: /&(cent|#162);/g, val: "¢" },
"pound": { regex: /&(pound|#163);/g, val: "£" },
"yen": { regex: /&(yen|#165);/g, val: "¥" },
"euro": { regex: /&(euro|#8364);/g, val: "€" },
"copyright": { regex: /&(copy|#169);/g, val: "©" },
"reg": { regex: /&(reg|#174);/g, val: "®" },
"inr": { regex: /&(inr|#8377);/g, val: "₹" },
"num_dec": { regex: /&#([0-9]{1,7});/g, val: (_, str) => fromCodePoint(str, 10, "&#") },
"num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val: (_, str) => fromCodePoint(str, 16, "&#x") },
};
this.addExternalEntities = addExternalEntities;
this.parseXml = parseXml;
this.parseTextData = parseTextData;
this.resolveNameSpace = resolveNameSpace;
this.buildAttributesMap = buildAttributesMap;
this.isItStopNode = isItStopNode;
this.replaceEntitiesValue = replaceEntitiesValue;
this.readStopNodeData = readStopNodeData;
this.saveTextToParentTag = saveTextToParentTag;
this.addChild = addChild;
this.ignoreAttributesFn = getIgnoreAttributesFn(this.options.ignoreAttributes)
this.entityExpansionCount = 0;
this.currentExpandedLength = 0;
// Initialize path matcher for path-expression-matcher
this.matcher = new Matcher();
// Live read-only proxy of matcher — PEM creates and caches this internally.
// All user callbacks receive this instead of the mutable matcher.
this.readonlyMatcher = this.matcher.readOnly();
// Flag to track if current node is a stop node (optimization)
this.isCurrentNodeStopNode = false;
// Pre-compile stopNodes expressions
if (this.options.stopNodes && this.options.stopNodes.length > 0) {
this.stopNodeExpressions = [];
for (let i = 0; i < this.options.stopNodes.length; i++) {
const stopNodeExp = this.options.stopNodes[i];
if (typeof stopNodeExp === 'string') {
// Convert string to Expression object
this.stopNodeExpressions.push(new Expression(stopNodeExp));
} else if (stopNodeExp instanceof Expression) {
// Already an Expression object
this.stopNodeExpressions.push(stopNodeExp);
}
}
}
}
}
function addExternalEntities(externalEntities) {
const entKeys = Object.keys(externalEntities);
for (let i = 0; i < entKeys.length; i++) {
const ent = entKeys[i];
const escaped = ent.replace(/[.\-+*:]/g, '\\.');
this.lastEntities[ent] = {
regex: new RegExp("&" + escaped + ";", "g"),
val: externalEntities[ent]
}
}
}
/**
* @param {string} val
* @param {string} tagName
* @param {string|Matcher} jPath - jPath string or Matcher instance based on options.jPath
* @param {boolean} dontTrim
* @param {boolean} hasAttributes
* @param {boolean} isLeafNode
* @param {boolean} escapeEntities
*/
function parseTextData(val, tagName, jPath, dontTrim, hasAttributes, isLeafNode, escapeEntities) {
if (val !== undefined) {
if (this.options.trimValues && !dontTrim) {
val = val.trim();
}
if (val.length > 0) {
if (!escapeEntities) val = this.replaceEntitiesValue(val, tagName, jPath);
// Pass jPath string or matcher based on options.jPath setting
const jPathOrMatcher = this.options.jPath ? jPath.toString() : jPath;
const newval = this.options.tagValueProcessor(tagName, val, jPathOrMatcher, hasAttributes, isLeafNode);
if (newval === null || newval === undefined) {
//don't parse
return val;
} else if (typeof newval !== typeof val || newval !== val) {
//overwrite
return newval;
} else if (this.options.trimValues) {
return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
} else {
const trimmedVal = val.trim();
if (trimmedVal === val) {
return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
} else {
return val;
}
}
}
}
}
function resolveNameSpace(tagname) {
if (this.options.removeNSPrefix) {
const tags = tagname.split(':');
const prefix = tagname.charAt(0) === '/' ? '/' : '';
if (tags[0] === 'xmlns') {
return '';
}
if (tags.length === 2) {
tagname = prefix + tags[1];
}
}
return tagname;
}
//TODO: change regex to capture NS
//const attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm");
const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm');
function buildAttributesMap(attrStr, jPath, tagName) {
if (this.options.ignoreAttributes !== true && typeof attrStr === 'string') {
// attrStr = attrStr.replace(/\r?\n/g, ' ');
//attrStr = attrStr || attrStr.trim();
const matches = getAllMatches(attrStr, attrsRegx);
const len = matches.length; //don't make it inline
const attrs = {};
// First pass: parse all attributes and update matcher with raw values
// This ensures the matcher has all attribute values when processors run
const rawAttrsForMatcher = {};
for (let i = 0; i < len; i++) {
const attrName = this.resolveNameSpace(matches[i][1]);
const oldVal = matches[i][4];
if (attrName.length && oldVal !== undefined) {
let parsedVal = oldVal;
if (this.options.trimValues) {
parsedVal = parsedVal.trim();
}
parsedVal = this.replaceEntitiesValue(parsedVal, tagName, this.readonlyMatcher);
rawAttrsForMatcher[attrName] = parsedVal;
}
}
// Update matcher with raw attribute values BEFORE running processors
if (Object.keys(rawAttrsForMatcher).length > 0 && typeof jPath === 'object' && jPath.updateCurrent) {
jPath.updateCurrent(rawAttrsForMatcher);
}
// Second pass: now process attributes with matcher having full attribute context
for (let i = 0; i < len; i++) {
const attrName = this.resolveNameSpace(matches[i][1]);
// Convert jPath to string if needed for ignoreAttributesFn
const jPathStr = this.options.jPath ? jPath.toString() : this.readonlyMatcher;
if (this.ignoreAttributesFn(attrName, jPathStr)) {
continue
}
let oldVal = matches[i][4];
let aName = this.options.attributeNamePrefix + attrName;
if (attrName.length) {
if (this.options.transformAttributeName) {
aName = this.options.transformAttributeName(aName);
}
//if (aName === "__proto__") aName = "#__proto__";
aName = sanitizeName(aName, this.options);
if (oldVal !== undefined) {
if (this.options.trimValues) {
oldVal = oldVal.trim();
}
oldVal = this.replaceEntitiesValue(oldVal, tagName, this.readonlyMatcher);
// Pass jPath string or readonlyMatcher based on options.jPath setting
const jPathOrMatcher = this.options.jPath ? jPath.toString() : this.readonlyMatcher;
const newVal = this.options.attributeValueProcessor(attrName, oldVal, jPathOrMatcher);
if (newVal === null || newVal === undefined) {
//don't parse
attrs[aName] = oldVal;
} else if (typeof newVal !== typeof oldVal || newVal !== oldVal) {
//overwrite
attrs[aName] = newVal;
} else {
//parse
attrs[aName] = parseValue(
oldVal,
this.options.parseAttributeValue,
this.options.numberParseOptions
);
}
} else if (this.options.allowBooleanAttributes) {
attrs[aName] = true;
}
}
}
if (!Object.keys(attrs).length) {
return;
}
if (this.options.attributesGroupName) {
const attrCollection = {};
attrCollection[this.options.attributesGroupName] = attrs;
return attrCollection;
}
return attrs
}
}
const parseXml = function (xmlData) {
xmlData = xmlData.replace(/\r\n?/g, "\n"); //TODO: remove this line
const xmlObj = new xmlNode('!xml');
let currentNode = xmlObj;
let textData = "";
// Reset matcher for new document
this.matcher.reset();
// Reset entity expansion counters for this document
this.entityExpansionCount = 0;
this.currentExpandedLength = 0;
const docTypeReader = new DocTypeReader(this.options.processEntities);
for (let i = 0; i < xmlData.length; i++) {//for each char in XML data
const ch = xmlData[i];
if (ch === '<') {
// const nextIndex = i+1;
// const _2ndChar = xmlData[nextIndex];
if (xmlData[i + 1] === '/') {//Closing Tag
const closeIndex = findClosingIndex(xmlData, ">", i, "Closing Tag is not closed.")
let tagName = xmlData.substring(i + 2, closeIndex).trim();
if (this.options.removeNSPrefix) {
const colonIndex = tagName.indexOf(":");
if (colonIndex !== -1) {
tagName = tagName.substr(colonIndex + 1);
}
}
tagName = transformTagName(this.options.transformTagName, tagName, "", this.options).tagName;
if (currentNode) {
textData = this.saveTextToParentTag(textData, currentNode, this.readonlyMatcher);
}
//check if last tag of nested tag was unpaired tag
const lastTagName = this.matcher.getCurrentTag();
if (tagName && this.options.unpairedTags.indexOf(tagName) !== -1) {
throw new Error(`Unpaired tag can not be used as closing tag: </${tagName}>`);
}
if (lastTagName && this.options.unpairedTags.indexOf(lastTagName) !== -1) {
// Pop the unpaired tag
this.matcher.pop();
this.tagsNodeStack.pop();
}
// Pop the closing tag
this.matcher.pop();
this.isCurrentNodeStopNode = false; // Reset flag when closing tag
currentNode = this.tagsNodeStack.pop();//avoid recursion, set the parent tag scope
textData = "";
i = closeIndex;
} else if (xmlData[i + 1] === '?') {
let tagData = readTagExp(xmlData, i, false, "?>");
if (!tagData) throw new Error("Pi Tag is not closed.");
textData = this.saveTextToParentTag(textData, currentNode, this.readonlyMatcher);
if ((this.options.ignoreDeclaration && tagData.tagName === "?xml") || this.options.ignorePiTags) {
//do nothing
} else {
const childNode = new xmlNode(tagData.tagName);
childNode.add(this.options.textNodeName, "");
if (tagData.tagName !== tagData.tagExp && tagData.attrExpPresent) {
childNode[":@"] = this.buildAttributesMap(tagData.tagExp, this.matcher, tagData.tagName);
}
this.addChild(currentNode, childNode, this.readonlyMatcher, i);
}
i = tagData.closeIndex + 1;
} else if (xmlData.substr(i + 1, 3) === '!--') {
const endIndex = findClosingIndex(xmlData, "-->", i + 4, "Comment is not closed.")
if (this.options.commentPropName) {
const comment = xmlData.substring(i + 4, endIndex - 2);
textData = this.saveTextToParentTag(textData, currentNode, this.readonlyMatcher);
currentNode.add(this.options.commentPropName, [{ [this.options.textNodeName]: comment }]);
}
i = endIndex;
} else if (xmlData.substr(i + 1, 2) === '!D') {
const result = docTypeReader.readDocType(xmlData, i);
this.docTypeEntities = result.entities;
i = result.i;
} else if (xmlData.substr(i + 1, 2) === '![') {
const closeIndex = findClosingIndex(xmlData, "]]>", i, "CDATA is not closed.") - 2;
const tagExp = xmlData.substring(i + 9, closeIndex);
textData = this.saveTextToParentTag(textData, currentNode, this.readonlyMatcher);
let val = this.parseTextData(tagExp, currentNode.tagname, this.readonlyMatcher, true, false, true, true);
if (val == undefined) val = "";
//cdata should be set even if it is 0 length string
if (this.options.cdataPropName) {
currentNode.add(this.options.cdataPropName, [{ [this.options.textNodeName]: tagExp }]);
} else {
currentNode.add(this.options.textNodeName, val);
}
i = closeIndex + 2;
} else {//Opening tag
let result = readTagExp(xmlData, i, this.options.removeNSPrefix);
// Safety check: readTagExp can return undefined
if (!result) {
// Log context for debugging
const context = xmlData.substring(Math.max(0, i - 50), Math.min(xmlData.length, i + 50));
throw new Error(`readTagExp returned undefined at position ${i}. Context: "${context}"`);
}
let tagName = result.tagName;
const rawTagName = result.rawTagName;
let tagExp = result.tagExp;
let attrExpPresent = result.attrExpPresent;
let closeIndex = result.closeIndex;
({ tagName, tagExp } = transformTagName(this.options.transformTagName, tagName, tagExp, this.options));
if (this.options.strictReservedNames &&
(tagName === this.options.commentPropName
|| tagName === this.options.cdataPropName
|| tagName === this.options.textNodeName
|| tagName === this.options.attributesGroupName
)) {
throw new Error(`Invalid tag name: ${tagName}`);
}
//save text as child node
if (currentNode && textData) {
if (currentNode.tagname !== '!xml') {
//when nested tag is found
textData = this.saveTextToParentTag(textData, currentNode, this.readonlyMatcher, false);
}
}
//check if last tag was unpaired tag
const lastTag = currentNode;
if (lastTag && this.options.unpairedTags.indexOf(lastTag.tagname) !== -1) {
currentNode = this.tagsNodeStack.pop();
this.matcher.pop();
}
// Clean up self-closing syntax BEFORE processing attributes
// This is where tagExp gets the trailing / removed
let isSelfClosing = false;
if (tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1) {
isSelfClosing = true;
if (tagName[tagName.length - 1] === "/") {
tagName = tagName.substr(0, tagName.length - 1);
tagExp = tagName;
} else {
tagExp = tagExp.substr(0, tagExp.length - 1);
}
// Re-check attrExpPresent after cleaning
attrExpPresent = (tagName !== tagExp);
}
// Now process attributes with CLEAN tagExp (no trailing /)
let prefixedAttrs = null;
let rawAttrs = {};
let namespace = undefined;
// Extract namespace from rawTagName
namespace = extractNamespace(rawTagName);
// Push tag to matcher FIRST (with empty attrs for now) so callbacks see correct path
if (tagName !== xmlObj.tagname) {
this.matcher.push(tagName, {}, namespace);
}
// Now build attributes - callbacks will see correct matcher state
if (tagName !== tagExp && attrExpPresent) {
// Build attributes (returns prefixed attributes for the tree)
// Note: buildAttributesMap now internally updates the matcher with raw attributes
prefixedAttrs = this.buildAttributesMap(tagExp, this.matcher, tagName);
if (prefixedAttrs) {
// Extract raw attributes (without prefix) for our use
rawAttrs = extractRawAttributes(prefixedAttrs, this.options);
}
}
// Now check if this is a stop node (after attributes are set)
if (tagName !== xmlObj.tagname) {
this.isCurrentNodeStopNode = this.isItStopNode(this.stopNodeExpressions, this.matcher);
}
const startIndex = i;
if (this.isCurrentNodeStopNode) {
let tagContent = "";
// For self-closing tags, content is empty
if (isSelfClosing) {
i = result.closeIndex;
}
//unpaired tag
else if (this.options.unpairedTags.indexOf(tagName) !== -1) {
i = result.closeIndex;
}
//normal tag
else {
//read until closing tag is found
const result = this.readStopNodeData(xmlData, rawTagName, closeIndex + 1);
if (!result) throw new Error(`Unexpected end of ${rawTagName}`);
i = result.i;
tagContent = result.tagContent;
}
const childNode = new xmlNode(tagName);
if (prefixedAttrs) {
childNode[":@"] = prefixedAttrs;
}
// For stop nodes, store raw content as-is without any processing
childNode.add(this.options.textNodeName, tagContent);
this.matcher.pop(); // Pop the stop node tag
this.isCurrentNodeStopNode = false; // Reset flag
this.addChild(currentNode, childNode, this.readonlyMatcher, startIndex);
} else {
//selfClosing tag
if (isSelfClosing) {
({ tagName, tagExp } = transformTagName(this.options.transformTagName, tagName, tagExp, this.options));
const childNode = new xmlNode(tagName);
if (prefixedAttrs) {
childNode[":@"] = prefixedAttrs;
}
this.addChild(currentNode, childNode, this.readonlyMatcher, startIndex);
this.matcher.pop(); // Pop self-closing tag
this.isCurrentNodeStopNode = false; // Reset flag
}
else if (this.options.unpairedTags.indexOf(tagName) !== -1) {//unpaired tag
const childNode = new xmlNode(tagName);
if (prefixedAttrs) {
childNode[":@"] = prefixedAttrs;
}
this.addChild(currentNode, childNode, this.readonlyMatcher, startIndex);
this.matcher.pop(); // Pop unpaired tag
this.isCurrentNodeStopNode = false; // Reset flag
i = result.closeIndex;
// Continue to next iteration without changing currentNode
continue;
}
//opening tag
else {
const childNode = new xmlNode(tagName);
if (this.tagsNodeStack.length > this.options.maxNestedTags) {
throw new Error("Maximum nested tags exceeded");
}
this.tagsNodeStack.push(currentNode);
if (prefixedAttrs) {
childNode[":@"] = prefixedAttrs;
}
this.addChild(currentNode, childNode, this.readonlyMatcher, startIndex);
currentNode = childNode;
}
textData = "";
i = closeIndex;
}
}
} else {
textData += xmlData[i];
}
}
return xmlObj.child;
}
function addChild(currentNode, childNode, matcher, startIndex) {
// unset startIndex if not requested
if (!this.options.captureMetaData) startIndex = undefined;
// Pass jPath string or matcher based on options.jPath setting
const jPathOrMatcher = this.options.jPath ? matcher.toString() : matcher;
const result = this.options.updateTag(childNode.tagname, jPathOrMatcher, childNode[":@"])
if (result === false) {
//do nothing
} else if (typeof result === "string") {
childNode.tagname = result
currentNode.addChild(childNode, startIndex);
} else {
currentNode.addChild(childNode, startIndex);
}
}
/**
* @param {object} val - Entity object with regex and val properties
* @param {string} tagName - Tag name
* @param {string|Matcher} jPath - jPath string or Matcher instance based on options.jPath
*/
function replaceEntitiesValue(val, tagName, jPath) {
const entityConfig = this.options.processEntities;
if (!entityConfig || !entityConfig.enabled) {
return val;
}
// Check if tag is allowed to contain entities
if (entityConfig.allowedTags) {
const jPathOrMatcher = this.options.jPath ? jPath.toString() : jPath;
const allowed = Array.isArray(entityConfig.allowedTags)
? entityConfig.allowedTags.includes(tagName)
: entityConfig.allowedTags(tagName, jPathOrMatcher);
if (!allowed) {
return val;
}
}
// Apply custom tag filter if provided
if (entityConfig.tagFilter) {
const jPathOrMatcher = this.options.jPath ? jPath.toString() : jPath;
if (!entityConfig.tagFilter(tagName, jPathOrMatcher)) {
return val; // Skip based on custom filter
}
}
// Replace DOCTYPE entities
for (const entityName of Object.keys(this.docTypeEntities)) {
const entity = this.docTypeEntities[entityName];
const matches = val.match(entity.regx);
if (matches) {
// Track expansions
this.entityExpansionCount += matches.length;
// Check expansion limit
if (entityConfig.maxTotalExpansions &&
this.entityExpansionCount > entityConfig.maxTotalExpansions) {
throw new Error(
`Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}`
);
}
// Store length before replacement
const lengthBefore = val.length;
val = val.replace(entity.regx, entity.val);
// Check expanded length immediately after replacement
if (entityConfig.maxExpandedLength) {
this.currentExpandedLength += (val.length - lengthBefore);
if (this.currentExpandedLength > entityConfig.maxExpandedLength) {
throw new Error(
`Total expanded content size exceeded: ${this.currentExpandedLength} > ${entityConfig.maxExpandedLength}`
);
}
}
}
}
// Replace standard entities
for (const entityName of Object.keys(this.lastEntities)) {
const entity = this.lastEntities[entityName];
const matches = val.match(entity.regex);
if (matches) {
this.entityExpansionCount += matches.length;
if (entityConfig.maxTotalExpansions &&
this.entityExpansionCount > entityConfig.maxTotalExpansions) {
throw new Error(
`Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}`
);
}
}
val = val.replace(entity.regex, entity.val);
}
if (val.indexOf('&') === -1) return val;
// Replace HTML entities if enabled
if (this.options.htmlEntities) {
for (const entityName of Object.keys(this.htmlEntities)) {
const entity = this.htmlEntities[entityName];
const matches = val.match(entity.regex);
if (matches) {
//console.log(matches);
this.entityExpansionCount += matches.length;
if (entityConfig.maxTotalExpansions &&
this.entityExpansionCount > entityConfig.maxTotalExpansions) {
throw new Error(
`Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}`
);
}
}
val = val.replace(entity.regex, entity.val);
}
}
// Replace ampersand entity last
val = val.replace(this.ampEntity.regex, this.ampEntity.val);
return val;
}
function saveTextToParentTag(textData, parentNode, matcher, isLeafNode) {
if (textData) { //store previously collected data as textNode
if (isLeafNode === undefined) isLeafNode = parentNode.child.length === 0
textData = this.parseTextData(textData,
parentNode.tagname,
matcher,
false,
parentNode[":@"] ? Object.keys(parentNode[":@"]).length !== 0 : false,
isLeafNode);
if (textData !== undefined && textData !== "")
parentNode.add(this.options.textNodeName, textData);
textData = "";
}
return textData;
}
//TODO: use jPath to simplify the logic
/**
* @param {Array<Expression>} stopNodeExpressions - Array of compiled Expression objects
* @param {Matcher} matcher - Current path matcher
*/
function isItStopNode(stopNodeExpressions, matcher) {
if (!stopNodeExpressions || stopNodeExpressions.length === 0) return false;
for (let i = 0; i < stopNodeExpressions.length; i++) {
if (matcher.matches(stopNodeExpressions[i])) {
return true;
}
}
return false;
}
/**
* Returns the tag Expression and where it is ending handling single-double quotes situation
* @param {string} xmlData
* @param {number} i starting index
* @returns
*/
function tagExpWithClosingIndex(xmlData, i, closingChar = ">") {
let attrBoundary;
let tagExp = "";
for (let index = i; index < xmlData.length; index++) {
let ch = xmlData[index];
if (attrBoundary) {
if (ch === attrBoundary) attrBoundary = "";//reset
} else if (ch === '"' || ch === "'") {
attrBoundary = ch;
} else if (ch === closingChar[0]) {
if (closingChar[1]) {
if (xmlData[index + 1] === closingChar[1]) {
return {
data: tagExp,
index: index
}
}
} else {
return {
data: tagExp,
index: index
}
}
} else if (ch === '\t') {
ch = " "
}
tagExp += ch;
}
}
function findClosingIndex(xmlData, str, i, errMsg) {
const closingIndex = xmlData.indexOf(str, i);
if (closingIndex === -1) {
throw new Error(errMsg)
} else {
return closingIndex + str.length - 1;
}
}
function readTagExp(xmlData, i, removeNSPrefix, closingChar = ">") {
const result = tagExpWithClosingIndex(xmlData, i + 1, closingChar);
if (!result) return;
let tagExp = result.data;
const closeIndex = result.index;
const separatorIndex = tagExp.search(/\s/);
let tagName = tagExp;
let attrExpPresent = true;
if (separatorIndex !== -1) {//separate tag name and attributes expression
tagName = tagExp.substring(0, separatorIndex);
tagExp = tagExp.substring(separatorIndex + 1).trimStart();
}
const rawTagName = tagName;
if (removeNSPrefix) {
const colonIndex = tagName.indexOf(":");
if (colonIndex !== -1) {
tagName = tagName.substr(colonIndex + 1);
attrExpPresent = tagName !== result.data.substr(colonIndex + 1);
}
}
return {
tagName: tagName,
tagExp: tagExp,
closeIndex: closeIndex,
attrExpPresent: attrExpPresent,
rawTagName: rawTagName,
}
}
/**
* find paired tag for a stop node
* @param {string} xmlData
* @param {string} tagName
* @param {number} i
*/
function readStopNodeData(xmlData, tagName, i) {
const startIndex = i;
// Starting at 1 since we already have an open tag
let openTagCount = 1;
for (; i < xmlData.length; i++) {
if (xmlData[i] === "<") {
if (xmlData[i + 1] === "/") {//close tag
const closeIndex = findClosingIndex(xmlData, ">", i, `${tagName} is not closed`);
let closeTagName = xmlData.substring(i + 2, closeIndex).trim();
if (closeTagName === tagName) {
openTagCount--;
if (openTagCount === 0) {
return {
tagContent: xmlData.substring(startIndex, i),
i: closeIndex
}
}
}
i = closeIndex;
} else if (xmlData[i + 1] === '?') {
const closeIndex = findClosingIndex(xmlData, "?>", i + 1, "StopNode is not closed.")
i = closeIndex;
} else if (xmlData.substr(i + 1, 3) === '!--') {
const closeIndex = findClosingIndex(xmlData, "-->", i + 3, "StopNode is not closed.")
i = closeIndex;
} else if (xmlData.substr(i + 1, 2) === '![') {
const closeIndex = findClosingIndex(xmlData, "]]>", i, "StopNode is not closed.") - 2;
i = closeIndex;
} else {
const tagData = readTagExp(xmlData, i, '>')
if (tagData) {
const openTagName = tagData && tagData.tagName;
if (openTagName === tagName && tagData.tagExp[tagData.tagExp.length - 1] !== "/") {
openTagCount++;
}
i = tagData.closeIndex;
}
}
}
}//end for loop
}
function parseValue(val, shouldParse, options) {
if (shouldParse && typeof val === 'string') {
//console.log(options)
const newval = val.trim();
if (newval === 'true') return true;
else if (newval === 'false') return false;
else return toNumber(val, options);
} else {
if (isExist(val)) {
return val;
} else {
return '';
}
}
}
function fromCodePoint(str, base, prefix) {
const codePoint = Number.parseInt(str, base);
if (codePoint >= 0 && codePoint <= 0x10FFFF) {
return String.fromCodePoint(codePoint);
} else {
return prefix + str + ";";
}
}
function transformTagName(fn, tagName, tagExp, options) {
if (fn) {
const newTagName = fn(tagName);
if (tagExp === tagName) {
tagExp = newTagName
}
tagName = newTagName;
}
tagName = sanitizeName(tagName, options);
return { tagName, tagExp };
}
function sanitizeName(name, options) {
if (criticalProperties.includes(name)) {
throw new Error(`[SECURITY] Invalid name: "${name}" is a reserved JavaScript keyword that could cause prototype pollution`);
} else if (DANGEROUS_PROPERTY_NAMES.includes(name)) {
return options.onDangerousProperty(name);
}
return name;
}

View File

@@ -0,0 +1,71 @@
import { buildOptions } from './OptionsBuilder.js';
import OrderedObjParser from './OrderedObjParser.js';
import prettify from './node2json.js';
import { validate } from "../validator.js";
import XmlNode from './xmlNode.js';
export default class XMLParser {
constructor(options) {
this.externalEntities = {};
this.options = buildOptions(options);
}
/**
* Parse XML dats to JS object
* @param {string|Uint8Array} xmlData
* @param {boolean|Object} validationOption
*/
parse(xmlData, validationOption) {
if (typeof xmlData !== "string" && xmlData.toString) {
xmlData = xmlData.toString();
} else if (typeof xmlData !== "string") {
throw new Error("XML data is accepted in String or Bytes[] form.")
}
if (validationOption) {
if (validationOption === true) validationOption = {}; //validate with default options
const result = validate(xmlData, validationOption);
if (result !== true) {
throw Error(`${result.err.msg}:${result.err.line}:${result.err.col}`)
}
}
const orderedObjParser = new OrderedObjParser(this.options);
orderedObjParser.addExternalEntities(this.externalEntities);
const orderedResult = orderedObjParser.parseXml(xmlData);
if (this.options.preserveOrder || orderedResult === undefined) return orderedResult;
else return prettify(orderedResult, this.options, orderedObjParser.matcher, orderedObjParser.readonlyMatcher);
}
/**
* Add Entity which is not by default supported by this library
* @param {string} key
* @param {string} value
*/
addEntity(key, value) {
if (value.indexOf("&") !== -1) {
throw new Error("Entity value can't have '&'")
} else if (key.indexOf("&") !== -1 || key.indexOf(";") !== -1) {
throw new Error("An entity must be set without '&' and ';'. Eg. use '#xD' for '&#xD;'")
} else if (value === "&") {
throw new Error("An entity with value '&' is not permitted");
} else {
this.externalEntities[key] = value;
}
}
/**
* Returns a Symbol that can be used to access the metadata
* property on a node.
*
* If Symbol is not available in the environment, an ordinary property is used
* and the name of the property is here returned.
*
* The XMLMetaData property is only present when `captureMetaData`
* is true in the options.
*/
static getMetaDataSymbol() {
return XmlNode.getMetaDataSymbol();
}
}

173
node_modules/fast-xml-parser/src/xmlparser/node2json.js generated vendored Normal file
View File

@@ -0,0 +1,173 @@
'use strict';
import XmlNode from './xmlNode.js';
import { Matcher } from 'path-expression-matcher';
const METADATA_SYMBOL = XmlNode.getMetaDataSymbol();
/**
* Helper function to strip attribute prefix from attribute map
* @param {object} attrs - Attributes with prefix (e.g., {"@_class": "code"})
* @param {string} prefix - Attribute prefix to remove (e.g., "@_")
* @returns {object} Attributes without prefix (e.g., {"class": "code"})
*/
function stripAttributePrefix(attrs, prefix) {
if (!attrs || typeof attrs !== 'object') return {};
if (!prefix) return attrs;
const rawAttrs = {};
for (const key in attrs) {
if (key.startsWith(prefix)) {
const rawName = key.substring(prefix.length);
rawAttrs[rawName] = attrs[key];
} else {
// Attribute without prefix (shouldn't normally happen, but be safe)
rawAttrs[key] = attrs[key];
}
}
return rawAttrs;
}
/**
*
* @param {array} node
* @param {any} options
* @param {Matcher} matcher - Path matcher instance
* @returns
*/
export default function prettify(node, options, matcher, readonlyMatcher) {
return compress(node, options, matcher, readonlyMatcher);
}
/**
* @param {array} arr
* @param {object} options
* @param {Matcher} matcher - Path matcher instance
* @returns object
*/
function compress(arr, options, matcher, readonlyMatcher) {
let text;
const compressedObj = {}; //This is intended to be a plain object
for (let i = 0; i < arr.length; i++) {
const tagObj = arr[i];
const property = propName(tagObj);
// Push current property to matcher WITH RAW ATTRIBUTES (no prefix)
if (property !== undefined && property !== options.textNodeName) {
const rawAttrs = stripAttributePrefix(
tagObj[":@"] || {},
options.attributeNamePrefix
);
matcher.push(property, rawAttrs);
}
if (property === options.textNodeName) {
if (text === undefined) text = tagObj[property];
else text += "" + tagObj[property];
} else if (property === undefined) {
continue;
} else if (tagObj[property]) {
let val = compress(tagObj[property], options, matcher, readonlyMatcher);
const isLeaf = isLeafTag(val, options);
if (tagObj[":@"]) {
assignAttributes(val, tagObj[":@"], readonlyMatcher, options);
} else if (Object.keys(val).length === 1 && val[options.textNodeName] !== undefined && !options.alwaysCreateTextNode) {
val = val[options.textNodeName];
} else if (Object.keys(val).length === 0) {
if (options.alwaysCreateTextNode) val[options.textNodeName] = "";
else val = "";
}
if (tagObj[METADATA_SYMBOL] !== undefined && typeof val === "object" && val !== null) {
val[METADATA_SYMBOL] = tagObj[METADATA_SYMBOL]; // copy over metadata
}
if (compressedObj[property] !== undefined && Object.prototype.hasOwnProperty.call(compressedObj, property)) {
if (!Array.isArray(compressedObj[property])) {
compressedObj[property] = [compressedObj[property]];
}
compressedObj[property].push(val);
} else {
//TODO: if a node is not an array, then check if it should be an array
//also determine if it is a leaf node
// Pass jPath string or readonlyMatcher based on options.jPath setting
const jPathOrMatcher = options.jPath ? readonlyMatcher.toString() : readonlyMatcher;
if (options.isArray(property, jPathOrMatcher, isLeaf)) {
compressedObj[property] = [val];
} else {
compressedObj[property] = val;
}
}
// Pop property from matcher after processing
if (property !== undefined && property !== options.textNodeName) {
matcher.pop();
}
}
}
// if(text && text.length > 0) compressedObj[options.textNodeName] = text;
if (typeof text === "string") {
if (text.length > 0) compressedObj[options.textNodeName] = text;
} else if (text !== undefined) compressedObj[options.textNodeName] = text;
return compressedObj;
}
function propName(obj) {
const keys = Object.keys(obj);
for (let i = 0; i < keys.length; i++) {
const key = keys[i];
if (key !== ":@") return key;
}
}
function assignAttributes(obj, attrMap, readonlyMatcher, options) {
if (attrMap) {
const keys = Object.keys(attrMap);
const len = keys.length; //don't make it inline
for (let i = 0; i < len; i++) {
const atrrName = keys[i]; // This is the PREFIXED name (e.g., "@_class")
// Strip prefix for matcher path (for isArray callback)
const rawAttrName = atrrName.startsWith(options.attributeNamePrefix)
? atrrName.substring(options.attributeNamePrefix.length)
: atrrName;
// For attributes, we need to create a temporary path
// Pass jPath string or matcher based on options.jPath setting
const jPathOrMatcher = options.jPath
? readonlyMatcher.toString() + "." + rawAttrName
: readonlyMatcher;
if (options.isArray(atrrName, jPathOrMatcher, true, true)) {
obj[atrrName] = [attrMap[atrrName]];
} else {
obj[atrrName] = attrMap[atrrName];
}
}
}
}
function isLeafTag(obj, options) {
const { textNodeName } = options;
const propCount = Object.keys(obj).length;
if (propCount === 0) {
return true;
}
if (
propCount === 1 &&
(obj[textNodeName] || typeof obj[textNodeName] === "boolean" || obj[textNodeName] === 0)
) {
return true;
}
return false;
}

40
node_modules/fast-xml-parser/src/xmlparser/xmlNode.js generated vendored Normal file
View File

@@ -0,0 +1,40 @@
'use strict';
let METADATA_SYMBOL;
if (typeof Symbol !== "function") {
METADATA_SYMBOL = "@@xmlMetadata";
} else {
METADATA_SYMBOL = Symbol("XML Node Metadata");
}
export default class XmlNode {
constructor(tagname) {
this.tagname = tagname;
this.child = []; //nested tags, text, cdata, comments in order
this[":@"] = Object.create(null); //attributes map
}
add(key, val) {
// this.child.push( {name : key, val: val, isCdata: isCdata });
if (key === "__proto__") key = "#__proto__";
this.child.push({ [key]: val });
}
addChild(node, startIndex) {
if (node.tagname === "__proto__") node.tagname = "#__proto__";
if (node[":@"] && Object.keys(node[":@"]).length > 0) {
this.child.push({ [node.tagname]: node.child, [":@"]: node[":@"] });
} else {
this.child.push({ [node.tagname]: node.child });
}
// if requested, add the startIndex
if (startIndex !== undefined) {
// Note: for now we just overwrite the metadata. If we had more complex metadata,
// we might need to do an object append here: metadata = { ...metadata, startIndex }
this.child[this.child.length - 1][METADATA_SYMBOL] = { startIndex };
}
}
/** symbol used for metadata */
static getMetaDataSymbol() {
return METADATA_SYMBOL;
}
}