2 * The contents of this file are subject to the Mozilla Public
3 * License Version 1.1 (the "License"); you may not use this file
4 * except in compliance with the License. You may obtain a copy of
5 * the License at http://www.mozilla.org/MPL/
7 * Software distributed under the License is distributed on an "AS
8 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
9 * implied. See the License for the specific language governing
10 * rights and limitations under the License.
12 * The Original Code is the Sablotron XSLT Processor.
14 * The Initial Developer of the Original Code is Ginger Alliance Ltd.
15 * Portions created by Ginger Alliance are Copyright (C) 2000-2002
16 * Ginger Alliance Ltd. All Rights Reserved.
20 * Alternatively, the contents of this file may be used under the
21 * terms of the GNU General Public License Version 2 or later (the
22 * "GPL"), in which case the provisions of the GPL are applicable
23 * instead of those above. If you wish to allow use of your
24 * version of this file only under the terms of the GPL and not to
25 * allow others to use your version of this file under the MPL,
26 * indicate your decision by deleting the provisions above and
27 * replace them with the notice and other provisions required by
28 * the GPL. If you do not delete the provisions above, a recipient
29 * may use your version of this file under either the MPL or the
33 /*****************************************************************
35 *****************************************************************/
43 /*****************************************************************
47 *****************************************************************/
49 #define RF(CONDITION) {if (!(CONDITION)) return;}
51 // definition of names for various URI-reference parts
58 // definition of slahes in path names
60 #define isSlash(c) (c == '/' || c == '\\')
62 /*****************************************************************
65 splits a given string into two parts divided by the first occurence
66 of a delimiter from a given set. If no delimiter is found, returns FALSE
67 and leaves 'string' as is; otherwise shifts 'string' to the character
68 following the delimiter.
70 string the asciiz string to be split
71 delims the asciiz set of delimiters (all of them ASCII chars)
72 part1 first of the two parts
74 string shifted to the other part (past the delimiter)
75 . the delimiter found (or 0)
76 *****************************************************************/
78 char splitBy(const char *&string, const char *delims, Str &part1)
81 int firstLen = strcspn(string, delims);
82 part1.nset(string, firstLen);
83 if (!!(c = string[firstLen]))
84 string += firstLen + 1;
88 typedef Str FiveStr[5];
90 void splitURI(const char *uri, FiveStr &parts)
94 for (int i = 0; i < 5; i++)
97 // extract the scheme part of the URI
98 if (!splitBy(rest = uri, ":", parts[U_SCHEME]))
99 parts[U_SCHEME].empty();
100 // if "//" follows, extract the authority part
101 c = 'A'; // marks the absence of auth
102 if (isSlash(*rest) && isSlash(rest[1]))
103 RF( c = splitBy(rest += 2, slashes"?#", parts[U_AUTH]) );
104 if (isSlash(c) || c == 'A')
106 RF( c = splitBy(rest -= (isSlash(c)), "?#", parts[U_PATH]) );
109 RF( c = splitBy(rest, "#", parts[U_QUERY]) );
111 parts[U_FRAG] = (char *) rest;
114 void joinURI(DStr &joined, FiveStr &parts, Bool schemeToo)
117 if (schemeToo && !parts[U_SCHEME].isEmpty())
118 joined = parts[U_SCHEME] + ":";
119 if (!parts[U_AUTH].isEmpty())
120 joined += Str("//") + parts[U_AUTH]; // add authority
121 joined += parts[U_PATH]; // add path
122 if (!parts[U_QUERY].isEmpty()) // add query
123 joined += Str("?") + parts[U_QUERY];
124 if (!parts[U_FRAG].isEmpty()) // add fragment
125 joined += Str("#") + parts[U_FRAG];
128 /*****************************************************************
131 converts the scheme given as Str to one of the URI_... constants.
132 If the scheme is neither "file" or "arg" then URI_EXTENSION is
134 *****************************************************************/
136 URIScheme schemeToURI_(Sit S, Str& scheme)
138 if (scheme.eqNoCase("file") && !S.hasFlag(SAB_FILES_TO_HANDLER))
142 if (scheme.eqNoCase("arg"))
145 return URI_EXTENSION;
149 /*****************************************************************
152 truncates a path after 'howmany'-th slash from the right (1-based).
153 If there are fewer slashes, sets path to empty string and returns
154 FALSE, otherwise returns TRUE.
156 path the path to be truncated
157 howmany # of slashes that disappear in truncation, MINUS 1
159 . TRUE iff that many slashes were found
160 path the truncated path
161 *****************************************************************/
163 Bool cutLast(Str& path, int howmany)
166 char *p = (char*) temp;
169 for (i = temp.length() - 1; i >= 0; i--)
173 if (slashCount == howmany)
180 return (Bool)(i >= 0);
183 /*****************************************************************
186 merges a relative path with a base path
188 relPath the relative path. The result is returned here.
189 basePath the base path (always absolute)
191 relPath the newly constructed absolute path
192 *****************************************************************/
194 Bool segP(Str &s, int oneOrTwo)
196 return (Bool) !strcmp((char *) s, (oneOrTwo == 1 ? "." : ".."));
199 void joinPaths(Str& relPath, const Str& basePath)
203 // append the relPath to all-but-the-last-segment-of-basePath
205 Bool endSlash = cutLast(absPath = basePath, 1),
207 DStr result = absPath + (endSlash? "" : "/") + relPath;
209 // throw out all '.' from the path
210 const char *p = (const char*) result;
212 while(splitBy(p, slashes, segment))
214 if (!segP(segment, 1))
215 absPath += segment + "/";
217 if (!segP(segment, 1))
220 // throw out all "something/.." from the path
226 lastSeg = (Bool) !splitBy(p, slashes, segment);
227 if (!segP(segment, 2))
229 result += segment + (lastSeg ? "" : "/");
240 result += segment + (lastSeg ? "" : "/");
248 URIScheme makeAbsoluteURI2(Sit S, const char* uri,
249 const char* base, Str& absolute, Str& scheme)
258 // first, break up the URIs into their 5 components
259 splitURI(uri, u_parts);
260 splitURI(base, b_parts);
262 // set u_defined[i] to TRUE if the i-th uri component is nonvoid
263 for (int i = 0; i < 5; i++)
264 u_any = (Bool) ((u_defined[i] = (Bool) !u_parts[i].isEmpty()) || u_any);
266 if (!u_any) // all components empty: the reference is to the current document
268 splitURI(base,u_parts);
269 u_parts[U_QUERY].empty(); // query and fragment are NOT inherited from base
270 u_parts[U_FRAG].empty();
272 else // not all components are empty
274 if (!u_defined[U_SCHEME]) // undefined scheme
276 u_parts[U_SCHEME] = b_parts[U_SCHEME]; // inherit scheme from base
277 if (!u_defined[U_AUTH]) // undefined authority
279 u_parts[U_AUTH] = b_parts[U_AUTH]; // inherit authority from base
280 if (!isSlash(u_parts[U_PATH][0])) // path is relative
281 joinPaths(u_parts[U_PATH], b_parts[U_PATH]); // append path to base path
282 // query and fragment stay as they are in 'uri'
285 else // scheme defined, check for paths not starting with '/'
287 if (!u_defined[U_AUTH] && !isSlash(u_parts[U_PATH][0]))
288 u_parts[U_PATH] = Str("/") + u_parts[U_PATH];
291 DStr joined = absolute;
292 joinURI(joined, u_parts, FALSE); // join all components into a URI for return (no scheme)
294 return schemeToURI_(S, scheme = u_parts[U_SCHEME]);
298 // URIScheme makeAbsoluteURI(uri, base, absolute)
300 // Merges a (possibly relative) URI reference with a base URI, setting
301 // 'absolute' to the result.
303 URIScheme makeAbsoluteURI(Sit S, const char* uri,
304 const char* base, Str& absolute)
308 temp = makeAbsoluteURI2(S, uri, base, absolute, scheme);
309 absolute = (scheme + ":") + absolute;
314 URIScheme uri2SchemePath(Sit S, const char *absolute, Str& scheme, Str& rest)
316 Bool found = (Bool) !!splitBy(absolute, ":", scheme);
318 rest = (char*) absolute;
320 * if (isSlash(*absolute) && isSlash(absolute[1]))
321 * rest = (char*) absolute + 2;
323 * rest = (char*) absolute;
325 return schemeToURI_(S, scheme);
329 /*****************************************************************
332 is a class that holds the machinery needed to retrieve data from
333 a given URI. There are two internally supported URI schemes:
334 file (the plain "file://...")
335 arg (for access to named memory blocks passed to Sablotron)
337 Other schemes are passed to the extending scheme handler (if
338 one has been registered). This way, requests such as http:...
341 The life cycle of a DataLine:
342 Upon construction, no URI is attached yet.
343 Call open() to associate a URI.
344 Repeatedly call save() or get() to retrieve data.
345 Call close() to close the resource.
348 The 'write' data line with the scheme of 'arg' will need to be
349 accessible to the user even after the Processor object is destroyed;
350 it is then freed by 'SablotFreeBuffer'.
351 *****************************************************************/
353 /*****************************************************************
356 This constructor just sets everything to zeroes and such.
357 *****************************************************************/
368 utf16Encoded = FALSE;
372 gotWholeDocument = FALSE;
375 /*****************************************************************
376 DataLine::~DataLine()
378 The destructor asserts that the data line had been closed.
379 *****************************************************************/
381 DataLine::~DataLine()
383 // removing the asserts (can be killed anytime due to error)
384 // assert(mode == DLMODE_CLOSED || mode == DLMODE_NONE);
386 // if there is an outBuf, delete it now
391 /*****************************************************************
394 Opens the data line for a given URI and access mode. Actual
395 data transfer is only done on subsequent get() or save() calls.
396 open() tries to call the extending scheme handler if it cannot
397 handle a request itself.
400 _uri the URI identifier for the resource, including the
401 scheme (e.g. "file:///x.xml")
402 _baseUri the base URI used in case the reference in _uri is
404 _mode the access mode (DLMODE_READ, DLMODE_WRITE)
405 *****************************************************************/
407 #define specErr1(S, code, arg) \
408 {if (ignoreErr) {Warn1(S,code,arg); return NOT_OK;} else Err1(S,code,arg);}
410 eFlag DataLine::open(Sit S, const char *_uri, DLAccessMode _mode,
411 StrStrList* argList_, Bool ignoreErr /* = FALSE */)
413 assert(mode == DLMODE_NONE); // the buffer must not be open yet
414 // combine _uri and _baseUri into one
415 Str strScheme, strPath;
416 scheme = uri2SchemePath(S, _uri, strScheme, strPath);
417 char *name = (char*) strPath;
419 // mode set in the end
420 fullUri = (char*)_uri;
426 if (name[0] == '/' && name[1] == '/')
427 name += 2; // skipping the "//" in front
428 // try to open the file
430 if (!(f = stdopen(name,_mode == DLMODE_WRITE ? "wb" : "rt")))
432 if (!(f = stdopen(name,_mode == DLMODE_WRITE ? "w" : "r")))
434 specErr1(S, E_FILE_OPEN, name);
435 // set fileIsStd if filename is "stdin", "stdout" or "stderr"
436 fileIsStd = isstd(name);
440 // if opening for read access, get the pointer to the argument contents
441 // plus some extra information
442 if (_mode == DLMODE_READ)
446 value = argList_ -> find(name);
448 specErr1(S, E1_ARG_NOT_FOUND, name);
449 buffer = (char*)*value;
451 // if opening for write access, just allocate a new dynamic block
453 outBuf = new DynBlock;
457 // try the extending scheme handler
458 // ask the handler address from the Processor
459 Processor *proc = S.getProcessor();
461 handler = proc->getSchemeHandler(&handlerUD);
464 // if there is no handler, report unsupported scheme
466 specErr1(S, E1_UNSUPPORTED_SCHEME, strScheme);
470 if (_mode == DLMODE_READ && handler -> getAll)
471 handler -> getAll(handlerUD, proc,
472 strScheme, name, &buffer, &count);
473 if (buffer && (count != -1))
475 gotWholeDocument = TRUE;
480 // call the handler's open() function, obtaining a handle
481 switch(handler -> open(handlerUD, proc,
482 strScheme, name, &handle))
484 case SH_ERR_UNSUPPORTED_SCHEME: // scheme not supported
485 specErr1(S, E1_UNSUPPORTED_SCHEME, strScheme);
487 specErr1(S, E1_URI_OPEN, strScheme + ":" + strPath);
492 // open successfully completed. Set the new mode.
497 /*****************************************************************
500 closes the resource attached to this data line.
501 *****************************************************************/
502 eFlag DataLine::close(Sit S)
504 assert(mode != DLMODE_NONE);
513 Err1(S, E1_URI_CLOSE, fullUri);
521 if (gotWholeDocument)
523 NZ(handler) -> freeMemory(handlerUD, S.getProcessor(), buffer);
527 if(NZ(handler) -> close(handlerUD, S.getProcessor(), handle))
528 Err1(S, E1_URI_CLOSE, fullUri);
532 mode = DLMODE_CLOSED;
536 /*****************************************************************
539 saves an UTF-8 string pointed to by data to the data line.
540 This is the place to perform any recoding, escaping and other operations
541 that require char-by-char scanning of the string.
542 *****************************************************************/
544 int my_wcslen(const char *p)
547 for (len = 2; *(short int*)p; p += 2, len += 2);
551 eFlag DataLine::save(Sit S, const char *data, int length)
553 assert(mode == DLMODE_WRITE); // assume the file open for writing
554 // int length = utf16Encoded ? my_wcslen(data) : strlen(data);
555 switch (scheme) // choose the output procedure
557 case URI_FILE: // file: scheme
559 assert(f); // the file must be open
561 fwrite(data, 1, length, f);
563 case URI_ARG: // arg: scheme
565 assert(outBuf); // the output buffer must exist
566 outBuf -> nadd(data, length);
568 case URI_EXTENSION: // external handler
571 if( NZ(handler) -> put(handlerUD, S.getProcessor(), handle, data, &actual) )
572 Err1(S, E1_URI_WRITE, fullUri);
578 /*................................................................
582 a macro that returns nonzero if the given char* points at a
587 is16 TRUE iff the string is UTF-16
588 ................................................................*/
590 #define pointsAtEnd(p, is16) ((is16) ? (!*(unsigned short*)(p)) : (!*(p)))
592 /*****************************************************************
595 - retrieves at most 'maxcount' bytes into buffer 'dest'.
596 - input should be NUL-terminated
597 - if a terminating 0 is reached, copying stops
598 *****************************************************************/
600 int DataLine::get(Sit S, char *dest,int maxcount)
603 assert(mode == DLMODE_READ); // assume the file open for reading
608 assert(f); // the file must be open
609 result = fread(dest,1,maxcount,f);
610 // return the number of bytes read
614 assert(buffer); // the buffer must exist
615 // do a 'strncpy' that shifts dest and bufCurr;
616 // i counts the number of bytes transferred
617 char * copyChar = dest;
620 (!pointsAtEnd(buffer + bufCurr, utf16Encoded)) && (i < maxcount);
623 *(copyChar++) = buffer[bufCurr++];
627 case URI_EXTENSION: // external handler
629 if (gotWholeDocument)
631 // ugly hack: copied the following from above
632 assert(buffer); // the buffer must exist
633 char * copyChar = dest;
636 (!pointsAtEnd(buffer + bufCurr, utf16Encoded)) && (i < maxcount);
639 *(copyChar++) = buffer[bufCurr++];
645 int actual = maxcount;
646 if( NZ(handler) -> get(handlerUD, S.getProcessor(), handle, dest, &actual) )
648 S.message( MT_ERROR, E1_URI_READ, fullUri, "" );
655 // need to NUL terminate in order to prevent C string
656 // functions running off the end of the buffer
658 // assignment assumes that the passed in dest is allocated
659 // one bigger than maxcount
661 return result; // return the number of bytes read
664 /*****************************************************************
667 returns the pointer to the output buffer which may be used after
668 all processing is finished (remains allocated along with the
670 *****************************************************************/
672 DynBlock* DataLine::getOutBuffer()
674 // check that the output buffer exists and that we're open for write
675 assert(mode == DLMODE_WRITE && scheme == URI_ARG);
676 return NZ(outBuf); // -> getPointer();
679 eFlag DataLine::setURIAndClose(Sit S, const char *_uri)
681 assert( mode == DLMODE_NONE );
682 mode = DLMODE_CLOSED;
688 void DataLine::report(Sit S, MsgType type, MsgCode code, const Str& arg1, const Str& arg2)
690 S.message(type, code, arg1, arg2);